Data Summary:

This dataset was generated by our group, which can be downloaded from GEO (GSE146974) or or https://drive.google.com/file/d/1kR8Hhufoo2h2OtomW8n3kM0gaQhVS564/view?usp=sharing. This dataset was generated from human peripheral blood mononuclear clear cells by Ficoll Separation followed by CD14 and CD16 positive cell selection. Since the CD14 and CD16 antibodies are not 100% specific, some T cells were also present in the scRNA-seq data. We performed clustering analysis using leiden’s algorithm for each batch and identified 288 T cells in total based on the T cell marker genes CD3D, CD3E and CD3G. Aftering removing these 288 T cells, there are 10,878 cells and 21,289 genes, which was processed and sequenced at three different days, resulting in three batches (3,640 cells in T1, 4,833 cells in T2 and 2,405 cells in T3) left in the remaining analysis.

Human monocyte preparation: Monocyte preparation uses a modification of published protocols. Briefly, ~20 ml blood drawn in sodium heparin was processed immediately in the lab in the Clinical Research Center at Columbia University. PBMCs were isolated by gradient Ficoll paque centrifugation, which maintains cell viability and prevents ex vivo activation during cell recovery. Cells were stained with antibodies against human HLADR, CD14 and CD16 and monocyte subsets defined as HLADR+CD14++CD16-(classical), HLADR+CD14++CD16+ (intermediate), HLADR+CD14dim/CD16++ (nonclassical, patrolling monocyte). DAPI staining was used to exclude dead cells. Monocytes were sorted by a BD Influx Sorter into tubes for real-time 10x Genomics analysis.

1 Summary

Here I used monocle3 (monocle3_0.2.1) to conduct the pseudotime analysis.

CarDEC, scVI and DCA are both deep learning based methods. For each method, we used all genes as the input, the way of Using latetnOne is the standard pipline for monocle3 (denosied count -> normalization -> scaling -> pca dimension reduction -> umap visualization based on pca dimension reduction) and the other method replces the pca by the latent representation and then umap visualization based on latent representation.

============================================================================================

options(warn=-1) # turn off warning message globally
.libPaths(c("/home/xiaoxiang/R/x86_64-pc-linux-gnu-library/3.5",.libPaths()))
Sys.setenv(RETICULATE_PYTHON_ENV="/home/xiaoxiang/anaconda3/envs/py36")#="/home/xiaoxiang/.conda/envs/DESCVIR"
Sys.setenv(RETICULATE_PYTHON="/usr/bin/python3")
#RETICULATE_PYTHON="/home/xiaoxiang/anaconda3/bin/python3",
if ("Seurat" %in% loadedNamespaces()) detach("package:Seurat",unload = T)
dyn.load("/home/xiaoxiang/R/x86_64-pc-linux-gnu-library/3.5/sf/libs/sf.so")
#suppressPackageStartupMessages(library(monocle,lib.loc = "/usr/lib/R/monocle_alpha"))# devtools::install_github("")
#devtools::install_github("cole-trapnell-lab/DDRTree", ref="simple-ppt-like",lib="/usr/lib/R/monocle_alpha")
#devtools::install_github("r-spatial/sf") if 
#install.packages("~/Downloads/monocle-release-monocle3_alpha/", repos = NULL,lib = "/usr/lib/R/monocle_alpha")
suppressPackageStartupMessages(library(reticulate))
#suppressPackageStartupMessages(library(devtools))
suppressPackageStartupMessages(library(monocle3))
#suppressPackageStartupMessages(library(flexclust))
#suppressPackageStartupMessages(library(mcclust))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(ggjoy))
suppressPackageStartupMessages(library(VGAM))
suppressPackageStartupMessages(library(knitr))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(kableExtra))
#py_install('umap-learn', pip = T, pip_ignore_installed = T)
#import("leiden")
#fig_path="/home/xiaoxiang/Documents/DESC_paper_prepare/DESC_paper_final/formal_revised/figures_sep/"
datadirpath="./"
df_pseudotime_list=list()
# load necessay function
#source("/media/xiaoxiang/D/DESC_reproducible_file/helpfunc_new.R")
#source("/media/xiaoxiang/D/Upenn_computer_backup/Documents/Human_Heart_Project/heart/Heart_result_updated/helpfunc_new.R")
old=theme_set(theme_bw()+theme(strip.background = element_rect(fill="white"),
                                         panel.background = element_blank(),
                               legend.background = element_blank(),
                                         panel.grid =element_blank()))
BatchKL=function(df,dimensionData=NULL,replicates=200,n_neighbors=100,n_cells=100,batch="BatchID"){
  #entropy of batch mixiing
  #replicates is the number of boostrap times
  #n_neighbors is the number of nearest neighbours of cell(from all batchs)
  #n_cells is the number of randomly picked cells
  if (is.null(dimensionData)){
        tsnedata=as.matrix(df[,c("tSNE_1","tSNE_2")])
  }else{
        tsnedata=as.matrix(dimensionData)
  }
  batchdata=factor(as.vector(df[,batch]))
  table.batchdata=as.matrix(table(batchdata))[,1]
  tmp00=table.batchdata/sum(table.batchdata)#proportation of population
  n=dim(df)[1]
  KL=sapply(1:replicates,function(x){
    bootsamples=sample(1:n,n_cells)
    #nearest=nn2(tsnedata,tsnedata[bootsamples,],k=n_neighbors)
    nearest=nabor::knn(tsnedata,tsnedata[bootsamples,],k=min(5*length(tmp00),n_neighbors))
    KL_x=sapply(1:length(bootsamples),function(y){
      id=nearest$nn.idx[y,]
      tmp=as.matrix(table(batchdata[id]))[,1]
      tmp=tmp/sum(tmp)
      return(sum(tmp*log2(tmp/tmp00),na.rm = T))
    })
    return(mean(KL_x,na.rm = T))
  })
  return(KL)
}
Convert_to_seurat3=function(adata){
  suppressPackageStartupMessages(library("Seurat",lib.loc = "/usr/lib/R/self_library/"))
  mtx=py_to_r(adata$X$T$tocsc())
  cellinfo=py_to_r(adata$obs)
  geneinfo=py_to_r(adata$var)
  colnames(mtx)=cellinfo$cellname
  rownames(mtx)=rownames(geneinfo)
  obj=CreateSeuratObject(mtx,meta.data = cellinfo[,!colnames(cellinfo)%in%c("n_genes","n_counts"),drop=F],min.features  = 1)
  return(obj)
}
getwd()
[1] "/home/xiaoxiang/Documents/carDEC_paper/CarDEC_new20200421/results_rmarkdown"
get_plot4=function(df00){
  p1=ggplot()+geom_point(data =df00,aes(x=UMAP_1,y=UMAP_2,color=FCGR3A),size=0.01)+
    scale_color_gradient(low="grey",high="red")+
    theme(legend.position = "top")+
    guides(color=guide_colorbar(title.vjust = 0.7))
  
  p2=ggplot()+geom_point(data =df00,aes(x=UMAP_1,y=UMAP_2,color=S100A8),size=0.01)+
    scale_color_gradient(low="grey",high="red")+
    theme(legend.position = "top")+
     guides(color=guide_colorbar(title.vjust = 0.7))
  
  p3=ggplot(data =df00,aes(x=pseudotime,y=FCGR3A))+
      geom_point(aes(color=BatchID),size=0.01)+
      guides(color=guide_legend(override.aes = list(size=5)))+
      geom_smooth(aes(color=BatchID),method="gam",formula = y ~ s(x, bs="cs"))+
       geom_smooth(color="black",method="gam",formula = y ~ s(x, bs="cs"),size=0.5)+
      ggtitle("")+xlab("Pseudotime")+theme(legend.position = "top",
                               plot.title = element_text(size=18,face="bold",hjust=0.5),
                               legend.text = element_text(size=15,face="bold"),
                               plot.margin = unit(c(0,1,0,0),"cm"),
                               legend.title = element_blank())+
    scale_color_brewer(palette = "Set2")
  
  p4=ggplot(data =df00,aes(x=pseudotime,y=S100A8))+
      geom_point(aes(color=BatchID),size=0.01)+
      guides(color=guide_legend(override.aes = list(size=5)))+
      geom_smooth(aes(color=BatchID),method="gam",formula = y ~ s(x, bs="cs"))+
       geom_smooth(color="black",method="gam",formula = y ~ s(x, bs="cs"),size=0.5)+
      ggtitle("")+xlab("Pseudotime")+theme(legend.position = "top",
                               plot.title = element_text(size=18,face="bold",hjust=0.5),
                               legend.text = element_text(size=15,face="bold"),
                               legend.title = element_blank())+scale_color_brewer(palette = "Set2")
  
  p=egg::ggarrange(p1,p3,p2,p4,ncol=4,draw=F)
  return(p)
}
get_plot4_sep=function(df00){
  p1=ggplot()+geom_point(data =df00,aes(x=UMAP_1,y=UMAP_2,color=FCGR3A),size=0.01)+
    scale_color_gradient(low="grey",high="red")+
    theme(legend.position = "top")+
    guides(color=guide_colorbar(title.vjust = 0.7))
  
  p2=ggplot()+geom_point(data =df00,aes(x=UMAP_1,y=UMAP_2,color=S100A8),size=0.01)+
    scale_color_gradient(low="grey",high="red")+
    theme(legend.position = "top")+
     guides(color=guide_colorbar(title.vjust = 0.7))
  
  p3=ggplot(data =df00,aes(x=pseudotime,y=FCGR3A))+
      geom_point(aes(color=BatchID),size=0.05)+
      guides(color=guide_legend(override.aes = list(size=5)))+
      geom_smooth(aes(color=BatchID),method="gam",formula = y ~ s(x, bs="cs"))+
       geom_smooth(color="black",method="gam",formula = y ~ s(x, bs="cs"),size=0.5)+
      ggtitle("")+xlab("Pseudotime")+theme(legend.position = "top",
                               plot.title = element_text(size=18,face="bold",hjust=0.5),
                               legend.text = element_text(size=15,face="bold"),
                               #plot.margin = unit(c(0,1,0,0),"cm"),
                               legend.title = element_blank())+
    scale_color_brewer(palette = "Set2")
  p4=ggplot(data =df00,aes(x=pseudotime,y=S100A8))+
      geom_point(aes(color=BatchID),size=0.05)+
      guides(color=guide_legend(override.aes = list(size=5)))+
      geom_smooth(aes(color=BatchID),method="gam",formula = y ~ s(x, bs="cs"))+
       geom_smooth(color="black",method="gam",formula = y ~ s(x, bs="cs"),size=0.5)+
      ggtitle("")+xlab("Pseudotime")+theme(legend.position = "top",
                               plot.title = element_text(size=18,face="bold",hjust=0.5),
                               legend.text = element_text(size=15,face="bold"),
                               legend.title = element_blank())+
    scale_color_brewer(palette = "Set2")
  return(list(p1,p2,p3,p4))
}
ad=import("anndata",convert = FALSE)
/home/xiaoxiang/.local/lib/python3.6/site-packages/dask/config.py:161: YAMLLoadWarning: calling yaml.load() without Loader=... is deprecated, as the default Loader is unsafe. Please read https://msg.pyyaml.org/load for full details.
  data = yaml.load(f.read()) or {}
adata=ad$read_h5ad("../../dca_test.h5ad")
obj0=Convert_to_seurat3(adata)
obj0=NormalizeData(obj0,verbose = F)
raw.data=obj0@assays$RNA@counts
maprules=c("2017_0801"="T1","2017_1017"="T2","2017_1120"="T3")
maprules
2017_0801 2017_1017 2017_1120 
     "T1"      "T2"      "T3" 

Here we compared different methods, including DCA and scVI.

hvg_genes=read.table("../final_processed_results/CarDEC_hvg_used.tsv",header = T,sep="\t",stringsAsFactors = F)
hvg_genes=subset(hvg_genes,Variance.Type=="HVG") #top 2000 genes 

2 Monocle3 for raw data

2.1 HVGs raw

rr cell.meta.data=obj0@meta.data cell.meta.data\(dataset_batch=plyr::mapvalues(cell.meta.data\)batch_label,names(maprules),maprules) gene_ann=data.frame(gene_short_name = make.unique(rownames(raw.data)),row.names = make.unique(rownames(raw.data))) #pd <- new(,data=cell.meta.data) #fd <- new(,data=gene_ann) cds <- new_cell_data_set(raw.data[rownames(raw.data)%in%hvg_genes$genename,], cell_metadata = cell.meta.data, gene_metadata =gene_ann[gene_ann\(gene_short_name%in%hvg_genes\)genename,,drop=F]) ## Step 1: Normalize and pre-process the data cds <- preprocess_cds(cds, num_dim = 32,method=,norm_method=,verbose = F)

multiple methods tables found for ‘type’

rr ## Step 2: Remove batch effects with cell alignment ##cds <- align_cds(cds, alignment_group = , residual_model_formula_str = NULL) ## Step 3: Reduce the dimensions using UMAP cds <- reduce_dimension(cds,reduction_method = ,preprocess_method=,verbose = F) ## Step 4: Cluster the cells cds <- cluster_cells(cds,reduction_method =,cluster_method = ,verbose = F) # Construct the graph # Note that, for the rest of the code to run, the graph should be fully (partionly) connected ## Step 5: Learn a graph cds <- learn_graph(cds, use_partition = T,verbose = F)


  |                                                                                                                                                
  |                                                                                                                                          |   0%
  |                                                                                                                                                
  |==========================================================================================================================================| 100%

  |                                                                                                                                                
  |                                                                                                                                          |   0%
  |                                                                                                                                                
  |==========================================================================================================================================| 100%

rr colData(cds)\(clusters=cds@clusters\)UMAP$clusters p1=plot_cells(cds,color_cells_by = ,label_cell_groups = F)+theme(legend.position = ) p2=plot_cells(cds,color_cells_by = ,label_cell_groups=F,graph_label_size=2, label_leaves=F,label_branch_points=F)+theme(legend.position = ) p=cowplot::plot_grid(p1,p2,align = ,ncol = 3)

rr p

rr ## Step 6: Order cells # a helper function to identify the root principal points: get_earliest_principal_node <- function(cds, cluster=c(,)){ root_pr_nodes=sapply(cluster,function(ii){ cell_ids <- which(colData(cds)[, ] %in%ii)

closest_vertex <-cds@principal_graph_aux[[]]$pr_graph_cell_proj_closest_vertex

closest_vertex <- as.matrix(closest_vertex[colnames(cds), ]) root_pr_nodes <-igraph::V(principal_graph(cds)[[]])$name[as.numeric(names(which.max(table(closest_vertex[cell_ids,]))))] }) root_pr_nodes } # root cells ids=get_earliest_principal_node(cds,cluster=c(,,)) cds <- order_cells(cds,root_pr_nodes = ids) #plot_cells(cds,color_cells_by = )

rr colData(cds)\(pseudotime=pseudotime(cds) colData(cds)\)Pseudotime=colData(cds)\(pseudotime/max(colData(cds)\)pseudotime,na.rm = T) df_den=pData(cds)[,c(,_batch)] df_den=as.data.frame(df_den[!is.infinite(df_den$Pseudotime),]) set.seed(10) theme_use=theme(legend.text = element_text(size=16), legend.title = element_text(size=20)) p_ori_1=plot_cells(cds,color_cells_by = _batch,graph_label_size=0,alpha=1,cell_size = 0.6)+ guides(colour = guide_legend(override.aes = list(alpha=0.7, size=5)))+ theme_use+ theme(legend.position = )+theme(legend.title = element_blank())

p_ori_2=plot_cells(cds,color_cells_by = ,label_branch_points=T,graph_label_size=2,alpha=1,cell_size = 0.6)+ theme(legend.position = , legend.title = element_text(vjust = 0.2), legend.text = element_text(angle=-50 ), legend.key.height = unit(0.5,), legend.key.width = unit(1,))+ guides(color = guide_colourbar(label.position = ))+theme_use

Cells aren't colored in a way that allows them to be grouped.

rr p_ori_3=ggplot(data=df_den)+geom_density(aes(x=Pseudotime,fill=dataset_batch),alpha=0.7)+ scale_y_continuous(expand = c(0,0))+ scale_x_continuous(expand = c(0,0))+ theme(legend.position=)+theme_use p_monocle_ori=egg::ggarrange(p_ori_1,p_ori_2,p_ori_3,ncol=3,draw=F)

rr # printed how many cells with no pseudotime table(as.numeric(is.infinite(pData(cds)[,c()]))) #0 mean normal pseudotime and 1 means infinity.


    0 
10878 

rr p_monocle_ori

rr cds_exprs=FetchData(obj0,vars = c(3A,100A8)) df0=data.frame(cbind(pseudotime=pData(cds)\(Pseudotime,cds_exprs)) #cds_exprs=as.matrix(SingleCellExperiment::counts(cds)[c(\FCGR3A\,\S100A8\),]) #df0=data.frame(cbind(pseudotime=pData(cds)\)Pseudotime,log1p(t(cds_exprs)/size_factors(cds)))) df0\(UMAP_1=reducedDims(cds)\)UMAP[,1] df0\(UMAP_2=reducedDims(cds)\)UMAP[,2] df0\(BatchID=pData(cds)\)dataset_batch df0=df0[is.finite(df0$pseudotime),] df0=df0[order(df0$pseudotime,decreasing = F),,drop=F] df0\(x=df0\)pseudotime/max(df0\(pseudotime) df_pseudotime_list\)raw=df0

  • Feature plots of FCGR3A and S100A8

rr p=get_plot4(df00 = df0)

rr p

rr df_den=colData(cds) tt1=ks.test(df_den\(Pseudotime[df_den\)dataset_batch==1],df_den\(Pseudotime[df_den\)dataset_batch==3])

p-value will be approximate in the presence of ties

rr tt1


    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == \T1\] and df_den$Pseudotime[df_den$dataset_batch == \T3\]
D = 0.57211, p-value < 2.2e-16
alternative hypothesis: two-sided

rr tt2=ks.test(df_den\(Pseudotime[df_den\)dataset_batch==1],df_den\(Pseudotime[df_den\)dataset_batch==2])

p-value will be approximate in the presence of ties

rr tt2


    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == \T1\] and df_den$Pseudotime[df_den$dataset_batch == \T2\]
D = 0.4611, p-value < 2.2e-16
alternative hypothesis: two-sided

rr tt3=ks.test(df_den\(Pseudotime[df_den\)dataset_batch==2],df_den\(Pseudotime[df_den\)dataset_batch==3]) tt3


    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == \T2\] and df_den$Pseudotime[df_den$dataset_batch == \T3\]
D = 0.70922, p-value < 2.2e-16
alternative hypothesis: two-sided

rr Stable5[1,2:4]=matrix(get_p_new(c(tt1\(p.value,tt2\)p.value,tt3\(p.value),c(tt1\)statistic,tt2\(statistic,tt3\)statistic)),1,3)

2.2 All genes raw

cell.meta.data=obj0@meta.data
cell.meta.data$dataset_batch=plyr::mapvalues(cell.meta.data$batch_label,names(maprules),maprules)
gene_ann=data.frame(gene_short_name = make.unique(rownames(raw.data)),row.names = make.unique(rownames(raw.data)))
#pd <- new("AnnotatedDataFrame",data=cell.meta.data)
#fd <- new("AnnotatedDataFrame",data=gene_ann)
cds <- new_cell_data_set(raw.data, cell_metadata = cell.meta.data,gene_metadata =gene_ann)
## Step 1: Normalize and pre-process the data
cds <- preprocess_cds(cds, num_dim = 32,method="PCA",norm_method="log",verbose = F)
## Step 2: Remove batch effects with cell alignment
##cds <- align_cds(cds, alignment_group = "BatchID", residual_model_formula_str = NULL)
## Step 3: Reduce the dimensions using UMAP
cds <- reduce_dimension(cds,reduction_method = "UMAP",preprocess_method="PCA",verbose = F)

## Step 4: Cluster the cells
cds <- cluster_cells(cds,reduction_method ="UMAP",cluster_method = "leiden",verbose = F)
# Construct the graph
# Note that, for the rest of the code to run, the graph should be fully (partionly) connected
## Step 5: Learn a graph
cds <- learn_graph(cds, use_partition = T,verbose = F)
colData(cds)$clusters=cds@clusters$UMAP$clusters
p1=plot_cells(cds,color_cells_by = "partition",label_cell_groups = F)+theme(legend.position = "top")
p2=plot_cells(cds,color_cells_by = "clusters",label_cell_groups=F,graph_label_size=2, label_leaves=F,label_branch_points=F)+theme(legend.position = "top")
p=cowplot::plot_grid(p1,p2,align = "h",ncol = 3)

rr p

rr ## Step 6: Order cells # a helper function to identify the root principal points: get_earliest_principal_node <- function(cds, cluster=c(,)){ root_pr_nodes=sapply(cluster,function(ii){ cell_ids <- which(colData(cds)[, ] %in%ii)

closest_vertex <-cds@principal_graph_aux[[]]$pr_graph_cell_proj_closest_vertex

closest_vertex <- as.matrix(closest_vertex[colnames(cds), ]) root_pr_nodes <-igraph::V(principal_graph(cds)[[]])$name[as.numeric(names(which.max(table(closest_vertex[cell_ids,]))))] }) root_pr_nodes } # root cells ids=get_earliest_principal_node(cds,cluster=c(,)) cds <- order_cells(cds,root_pr_nodes = ids) #plot_cells(cds,color_cells_by = )

rr colData(cds)\(pseudotime=pseudotime(cds) colData(cds)\)Pseudotime=colData(cds)\(pseudotime/max(colData(cds)\)pseudotime,na.rm = T) df_den=pData(cds)[,c(,_batch)] df_den=as.data.frame(df_den[!is.infinite(df_den$Pseudotime),]) set.seed(10) theme_use=theme(legend.text = element_text(size=16), legend.title = element_text(size=20)) p_ori_all_1=plot_cells(cds,color_cells_by = _batch,graph_label_size=0,alpha=1,cell_size = 0.6)+ guides(colour = guide_legend(override.aes = list(alpha=0.7, size=5)))+ theme_use+ theme(legend.position = )+theme(legend.title = element_blank())

p_ori_all_2=plot_cells(cds,color_cells_by = ,label_branch_points=T,graph_label_size=2,alpha=1,cell_size = 0.6)+ theme(legend.position = , legend.title = element_text(vjust = 0.2), legend.text = element_text(angle=-50 ), legend.key.height = unit(0.5,), legend.key.width = unit(1,))+ guides(color = guide_colourbar(label.position = ))+theme_use

Cells aren't colored in a way that allows them to be grouped.

rr p_ori_all_3=ggplot(data=df_den)+geom_density(aes(x=Pseudotime,fill=dataset_batch),alpha=0.7)+ scale_y_continuous(expand = c(0,0))+ scale_x_continuous(expand = c(0,0))+ theme(legend.position=)+theme_use p_monocle_ori_all=egg::ggarrange(p_ori_all_1,p_ori_all_2,p_ori_all_3,ncol=3,draw=F)

rr # printed how many cells with no pseudotime table(as.numeric(is.infinite(pData(cds)[,c()]))) #0 mean normal pseudotime and 1 means infinity.


    0 
10878 

rr p_monocle_ori_all

rr cds_exprs=FetchData(obj0,vars = c(3A,100A8)) df0=data.frame(cbind(pseudotime=pData(cds)\(Pseudotime,cds_exprs)) #cds_exprs=as.matrix(SingleCellExperiment::counts(cds)[c(\FCGR3A\,\S100A8\),]) #df0=data.frame(cbind(pseudotime=pData(cds)\)Pseudotime,log1p(t(cds_exprs)/size_factors(cds)))) df0\(UMAP_1=reducedDims(cds)\)UMAP[,1] df0\(UMAP_2=reducedDims(cds)\)UMAP[,2] df0\(BatchID=pData(cds)\)dataset_batch df0=df0[is.finite(df0$pseudotime),] df0=df0[order(df0$pseudotime,decreasing = F),,drop=F] df0\(x=df0\)pseudotime/max(df0\(pseudotime) df_pseudotime_list\)raw_all=df0

  • Feature plots of FCGR3A and S100A8

rr p=get_plot4(df00 = df0)

rr p

rr df_den=colData(cds) tt1=ks.test(df_den\(Pseudotime[df_den\)dataset_batch==1],df_den\(Pseudotime[df_den\)dataset_batch==3])

p-value will be approximate in the presence of ties

rr tt1


    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == \T1\] and df_den$Pseudotime[df_den$dataset_batch == \T3\]
D = 0.92486, p-value < 2.2e-16
alternative hypothesis: two-sided

rr tt2=ks.test(df_den\(Pseudotime[df_den\)dataset_batch==1],df_den\(Pseudotime[df_den\)dataset_batch==2]) tt2


    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == \T1\] and df_den$Pseudotime[df_den$dataset_batch == \T2\]
D = 0.68425, p-value < 2.2e-16
alternative hypothesis: two-sided

rr tt3=ks.test(df_den\(Pseudotime[df_den\)dataset_batch==2],df_den\(Pseudotime[df_den\)dataset_batch==3])

p-value will be approximate in the presence of ties

rr tt3


    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == \T2\] and df_den$Pseudotime[df_den$dataset_batch == \T3\]
D = 0.77509, p-value < 2.2e-16
alternative hypothesis: two-sided

rr Stable5[2,2:4]=matrix(get_p_new(c(tt1\(p.value,tt2\)p.value,tt3\(p.value),c(tt1\)statistic,tt2\(statistic,tt3\)statistic)),1,3)

3 Monocle3 using carDEC

In this section, we will evalutate the performance of carDEC.

Note that: carDEC used all genes and extracted HVG to evaluate.

rr adata=ad$read_h5ad(../final_processed_results/CarDEC Results/adata_CarDEC.h5ad)

rr cell.meta.data=py_to_r(adata\(obs) cell.meta.data\)dataset_batch=plyr::mapvalues(cell.meta.data\(batch_label,names(maprules),maprules) gene_ann0=py_to_r(adata\)var) gene_ann=data.frame(gene_short_name = make.unique(rownames(gene_ann0)), VarianceType=gene_ann0\(`Variance Type`, row.names = make.unique(rownames(gene_ann0))) mtx=t(py_to_r(adata\)layers[‘denoised counts’])) colnames(mtx)=cell.meta.data$cellname rownames(mtx)=rownames(gene_ann) mtx_sizefactor=1e4/colSums(mtx)

3.1 Using latent

cds <- new_cell_data_set(mtx, cell_metadata = cell.meta.data,gene_metadata =gene_ann)
## Step 1: Normalize and pre-process the data
cds <- preprocess_cds(cds, num_dim = 32,method="PCA",norm_method="log",verbose = F)

tmp0=py_to_r(adata$obsm["embedding"])
colnames(tmp0)=paste0("PC",1:ncol(tmp0))
reducedDims(cds)$PCA=tmp0

## Step 2: Remove batch effects with cell alignment
##cds <- align_cds(cds, alignment_group = "BatchID", residual_model_formula_str = NULL)
## Step 3: Reduce the dimensions using UMAP
cds <- reduce_dimension(cds,reduction_method = "UMAP",preprocess_method="PCA",verbose = F)

## Step 4: Cluster the cells
cds <- cluster_cells(cds,reduction_method ="UMAP",cluster_method = "leiden",verbose = F)

# Construct the graph
# Note that, for the rest of the code to run, the graph should be fully (partionly) connected
## Step 5: Learn a graph
cds <- learn_graph(cds, use_partition = T,verbose = F)
colData(cds)$clusters=cds@clusters$UMAP$clusters
p1=plot_cells(cds,color_cells_by = "partition",label_cell_groups = F)+theme(legend.position = "top")
p2=plot_cells(cds,color_cells_by = "clusters",label_cell_groups=F,graph_label_size=2, label_leaves=F,label_branch_points=F)+theme(legend.position = "top")
p=cowplot::plot_grid(p1,p2,align = "h",ncol = 3)

rr p

rr ## Step 6: Order cells # root cells ids=get_earliest_principal_node(cds,cluster=c()) cds <- order_cells(cds, root_pr_nodes=ids) plot_cells(cds,color_cells_by = )

Cells aren't colored in a way that allows them to be grouped.

rr colData(cds)\(pseudotime=pseudotime(cds) colData(cds)\)Pseudotime=colData(cds)\(pseudotime/max(colData(cds)\)pseudotime,na.rm = T) df_den=pData(cds)[,c(,_batch)] df_den=as.data.frame(df_den[!is.infinite(df_den$Pseudotime),]) set.seed(10) theme_use=theme(legend.text = element_text(size=16), legend.title = element_text(size=20)) p_carDEC_latent_1=plot_cells(cds,color_cells_by = _batch,,graph_label_size=0,alpha=1,cell_size = 0.6)+ guides(colour = guide_legend(override.aes = list(alpha=0.7, size=5)))+ theme_use+ theme(legend.position = )

p_carDEC_latent_2=plot_cells(cds,color_cells_by = ,label_branch_points=T,graph_label_size=2,alpha=1,cell_size = 0.6)+ theme(legend.position = , legend.title = element_text(vjust = 0.2), legend.text = element_text(angle=-50 ), legend.key.height = unit(0.5,), legend.key.width = unit(1,))+ guides(color = guide_colourbar(label.position = ))+theme_use

Cells aren't colored in a way that allows them to be grouped.

rr p_carDEC_latent_3=ggplot(data=df_den)+geom_density(aes(x=Pseudotime,fill=dataset_batch),alpha=0.7)+ scale_y_continuous(expand = c(0,0))+ scale_x_continuous(expand = c(0,0))+ theme(legend.position=)+theme_use p_monocle_carDEC_latent=egg::ggarrange(p_carDEC_latent_1,p_carDEC_latent_2,p_carDEC_latent_3,ncol=3,draw=F)

rr p_monocle_carDEC_latent

rr #cds_exprs=FetchData(obj0,vars = c(3A,100A8)) #df0=data.frame(cbind(pseudotime=pData(cds)\(Pseudotime,cds_exprs))=1e4/rowSums(mtx) cds_exprs=as.matrix(SingleCellExperiment::counts(cds)[c(\FCGR3A\,\S100A8\),]) df0=data.frame(cbind(pseudotime=pData(cds)\)Pseudotime,log1p(t(cds_exprs)*mtx_sizefactor))) df0\(UMAP_1=reducedDims(cds)\)UMAP[,1] df0\(UMAP_2=reducedDims(cds)\)UMAP[,2] df0\(BatchID=pData(cds)\)dataset_batch df0=df0[is.finite(df0$pseudotime),] df0=df0[order(df0$pseudotime,decreasing = F),,drop=F] df0\(x=df0\)pseudotime/max(df0\(pseudotime) df_pseudotime_list\)carDEC_latent=df0

  • Feature plots of FCGR3A and S100A8

rr p=get_plot4(df00 = df0)

rr p

rr df_den=colData(cds) tt1=ks.test(df_den\(Pseudotime[df_den\)dataset_batch==1],df_den\(Pseudotime[df_den\)dataset_batch==3]) tt1


    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == \T1\] and df_den$Pseudotime[df_den$dataset_batch == \T3\]
D = 0.051982, p-value = 0.0007981
alternative hypothesis: two-sided

rr tt2=ks.test(df_den\(Pseudotime[df_den\)dataset_batch==1],df_den\(Pseudotime[df_den\)dataset_batch==2]) tt2


    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == \T1\] and df_den$Pseudotime[df_den$dataset_batch == \T2\]
D = 0.033595, p-value = 0.01843
alternative hypothesis: two-sided

rr tt3=ks.test(df_den\(Pseudotime[df_den\)dataset_batch==2],df_den\(Pseudotime[df_den\)dataset_batch==3]) tt3


    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == \T2\] and df_den$Pseudotime[df_den$dataset_batch == \T3\]
D = 0.052437, p-value = 0.0002922
alternative hypothesis: two-sided

rr Stable5[3,2:4]=matrix(get_p_new(c(tt1\(p.value,tt2\)p.value,tt3\(p.value),c(tt1\)statistic,tt2\(statistic,tt3\)statistic)),1,3)

3.2 HVGs denoised

cds <- new_cell_data_set(mtx[gene_ann$VarianceType=="HVG",], cell_metadata = cell.meta.data,gene_metadata =gene_ann[gene_ann$VarianceType=="HVG",,drop=F])
## Step 1: Normalize and pre-process the data
cds <- preprocess_cds(cds, num_dim = 32,method="PCA",norm_method="log")

## Step 2: Remove batch effects with cell alignment
##cds <- align_cds(cds, alignment_group = "BatchID", residual_model_formula_str = NULL)
## Step 3: Reduce the dimensions using UMAP
cds <- reduce_dimension(cds,reduction_method = "UMAP",preprocess_method="PCA",verbose = F)

## Step 4: Cluster the cells
cds <- cluster_cells(cds,reduction_method ="UMAP",cluster_method = "leiden",verbose = F)

# Construct the graph
# Note that, for the rest of the code to run, the graph should be fully (partionly) connected
## Step 5: Learn a graph
cds <- learn_graph(cds, use_partition = T,verbose = F)
colData(cds)$clusters=cds@clusters$UMAP$clusters
p1=plot_cells(cds,color_cells_by = "partition",label_cell_groups = F)+theme(legend.position = "top")
p2=plot_cells(cds,color_cells_by = "clusters",label_cell_groups=F,graph_label_size=2, label_leaves=F,label_branch_points=F)+theme(legend.position = "top")
p=cowplot::plot_grid(p1,p2,align = "h",ncol = 3)

rr p

rr ## Step 6: Order cells # root cells ids=get_earliest_principal_node(cds,cluster=c()) cds <- order_cells(cds, root_pr_nodes=ids) #plot_cells(cds,color_cells_by = )

rr colData(cds)\(pseudotime=pseudotime(cds) colData(cds)\)Pseudotime=colData(cds)\(pseudotime/max(colData(cds)\)pseudotime,na.rm = T) df_den=pData(cds)[,c(,_batch)] df_den=as.data.frame(df_den[!is.infinite(df_den$Pseudotime),]) set.seed(10) theme_use=theme(legend.text = element_text(size=16), legend.title = element_text(size=20)) p_carDEC_denoised_hvg_1 = plot_cells(cds,color_cells_by = _batch,,graph_label_size=0,alpha=1,cell_size = 0.6)+ guides(colour = guide_legend(override.aes = list(alpha=0.7, size=5)))+ theme_use+ theme(legend.position = )

p_carDEC_denoised_hvg_2=plot_cells(cds,color_cells_by = ,label_branch_points=T,graph_label_size=2,alpha=1,cell_size = 0.6)+ theme(legend.position = , legend.title = element_text(vjust = 0.2), legend.text = element_text(angle=-50 ), legend.key.height = unit(0.5,), legend.key.width = unit(1,))+ guides(color = guide_colourbar(label.position = ))+theme_use

Cells aren't colored in a way that allows them to be grouped.

rr p_carDEC_denoised_hvg_3=ggplot(data=df_den)+geom_density(aes(x=Pseudotime,fill=dataset_batch),alpha=0.7)+ scale_y_continuous(expand = c(0,0))+ scale_x_continuous(expand = c(0,0))+ theme(legend.position=)+theme_use p_monocle_carDEC_denoised_hvg=egg::ggarrange(p_carDEC_denoised_hvg_1,p_carDEC_denoised_hvg_2,p_carDEC_denoised_hvg_3,ncol=3,draw=F)

rr p_monocle_carDEC_denoised_hvg

rr #cds_exprs=FetchData(obj0,vars = c(3A,100A8)) #df0=data.frame(cbind(pseudotime=pData(cds)\(Pseudotime,cds_exprs)) cds_exprs=as.matrix(SingleCellExperiment::counts(cds)[c(\FCGR3A\,\S100A8\),]) df0=data.frame(cbind(pseudotime=pData(cds)\)Pseudotime,log1p(t(cds_exprs)*mtx_sizefactor))) df0\(UMAP_1=reducedDims(cds)\)UMAP[,1] df0\(UMAP_2=reducedDims(cds)\)UMAP[,2] df0\(BatchID=pData(cds)\)dataset_batch df0=df0[is.finite(df0$pseudotime),] df0=df0[order(df0$pseudotime,decreasing = F),,drop=F] df0\(x=df0\)pseudotime/max(df0\(pseudotime) df_pseudotime_list\)carDEC_denoised_hvg=df0

  • Feature plots of FCGR3A and S100A8

rr p=get_plot4(df00 = df0)

rr p

rr df_den=colData(cds) tt1=ks.test(df_den\(Pseudotime[df_den\)dataset_batch==1],df_den\(Pseudotime[df_den\)dataset_batch==3]) tt1


    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == \T1\] and df_den$Pseudotime[df_den$dataset_batch == \T3\]
D = 0.069253, p-value = 1.855e-06
alternative hypothesis: two-sided

rr tt2=ks.test(df_den\(Pseudotime[df_den\)dataset_batch==1],df_den\(Pseudotime[df_den\)dataset_batch==2]) tt2


    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == \T1\] and df_den$Pseudotime[df_den$dataset_batch == \T2\]
D = 0.066806, p-value = 1.788e-08
alternative hypothesis: two-sided

rr tt3=ks.test(df_den\(Pseudotime[df_den\)dataset_batch==2],df_den\(Pseudotime[df_den\)dataset_batch==3]) tt3


    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == \T2\] and df_den$Pseudotime[df_den$dataset_batch == \T3\]
D = 0.066796, p-value = 1.196e-06
alternative hypothesis: two-sided

rr Stable5[4,2:4]=matrix(get_p_new(c(tt1\(p.value,tt2\)p.value,tt3\(p.value),c(tt1\)statistic,tt2\(statistic,tt3\)statistic)),1,3)

3.3 All genes denoised

cds <- new_cell_data_set(mtx, cell_metadata = cell.meta.data,gene_metadata =gene_ann)
## Step 1: Normalize and pre-process the data
cds <- preprocess_cds(cds, num_dim = 32,method="PCA",norm_method="log",verbose = F)

## Step 2: Remove batch effects with cell alignment
##cds <- align_cds(cds, alignment_group = "BatchID", residual_model_formula_str = NULL)
## Step 3: Reduce the dimensions using UMAP
cds <- reduce_dimension(cds,reduction_method = "UMAP",preprocess_method="PCA",verbose = F)

## Step 4: Cluster the cells
cds <- cluster_cells(cds,reduction_method ="UMAP",cluster_method = "leiden",verbose = F)

# Construct the graph
# Note that, for the rest of the code to run, the graph should be fully (partionly) connected
## Step 5: Learn a graph
cds <- learn_graph(cds, use_partition = T,verbose = F)
colData(cds)$clusters=cds@clusters$UMAP$clusters
p1=plot_cells(cds,color_cells_by = "partition",label_cell_groups = F)+theme(legend.position = "top")
p2=plot_cells(cds,color_cells_by = "clusters",label_cell_groups=F,graph_label_size=2, label_leaves=F,label_branch_points=F)+theme(legend.position = "top")
p=cowplot::plot_grid(p1,p2,align = "h",ncol = 3)

rr p

rr ## Step 6: Order cells # root cells ids=get_earliest_principal_node(cds,cluster=c()) cds <- order_cells(cds, root_pr_nodes=ids) #plot_cells(cds,color_cells_by = )

rr colData(cds)\(pseudotime=pseudotime(cds) colData(cds)\)Pseudotime=colData(cds)\(pseudotime/max(colData(cds)\)pseudotime,na.rm = T) df_den=pData(cds)[,c(,_batch)] df_den=as.data.frame(df_den[!is.infinite(df_den$Pseudotime),]) set.seed(10) theme_use=theme(legend.text = element_text(size=16), legend.title = element_text(size=20)) p_carDEC_denoised_all_1=plot_cells(cds,color_cells_by = _batch,,graph_label_size=0,alpha=1,cell_size = 0.6)+ guides(colour = guide_legend(override.aes = list(alpha=0.7, size=5)))+ theme_use+ theme(legend.position = )

p_carDEC_denoised_all_2=plot_cells(cds,color_cells_by = ,label_branch_points=T,graph_label_size=2,alpha=1,cell_size = 0.6)+ theme(legend.position = , legend.title = element_text(vjust = 0.2), legend.text = element_text(angle=-50 ), legend.key.height = unit(0.5,), legend.key.width = unit(1,))+ guides(color = guide_colourbar(label.position = ))+theme_use

Cells aren't colored in a way that allows them to be grouped.

rr p_carDEC_denoised_all_3=ggplot(data=df_den)+geom_density(aes(x=Pseudotime,fill=dataset_batch),alpha=0.7)+ scale_y_continuous(expand = c(0,0))+ scale_x_continuous(expand = c(0,0))+ theme(legend.position=)+theme_use p_monocle_carDEC_denoised_all=egg::ggarrange(p_carDEC_denoised_all_1,p_carDEC_denoised_all_2,p_carDEC_denoised_all_3,ncol=3,draw=F)

rr p_monocle_carDEC_denoised_all

rr #cds_exprs=FetchData(obj0,vars = c(3A,100A8)) #df0=data.frame(cbind(pseudotime=pData(cds)\(Pseudotime,cds_exprs)) cds_exprs=as.matrix(SingleCellExperiment::counts(cds)[c(\FCGR3A\,\S100A8\),]) df0=data.frame(cbind(pseudotime=pData(cds)\)Pseudotime,log1p(t(cds_exprs)*mtx_sizefactor))) df0\(UMAP_1=reducedDims(cds)\)UMAP[,1] df0\(UMAP_2=reducedDims(cds)\)UMAP[,2] df0\(BatchID=pData(cds)\)dataset_batch df0=df0[is.finite(df0$pseudotime),] df0=df0[order(df0$pseudotime,decreasing = F),,drop=F] df0\(x=df0\)pseudotime/max(df0\(pseudotime) df_pseudotime_list\)carDEC_denoised_all=df0

  • Feature plots of FCGR3A and S100A8

rr p=get_plot4(df00 = df0)

rr p

rr df_den=colData(cds) tt1=ks.test(df_den\(Pseudotime[df_den\)dataset_batch==1],df_den\(Pseudotime[df_den\)dataset_batch==3]) tt1


    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == \T1\] and df_den$Pseudotime[df_den$dataset_batch == \T3\]
D = 0.12681, p-value < 2.2e-16
alternative hypothesis: two-sided

rr tt2=ks.test(df_den\(Pseudotime[df_den\)dataset_batch==1],df_den\(Pseudotime[df_den\)dataset_batch==2]) tt2


    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == \T1\] and df_den$Pseudotime[df_den$dataset_batch == \T2\]
D = 0.069599, p-value = 3.674e-09
alternative hypothesis: two-sided

rr tt3=ks.test(df_den\(Pseudotime[df_den\)dataset_batch==2],df_den\(Pseudotime[df_den\)dataset_batch==3]) tt3


    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == \T2\] and df_den$Pseudotime[df_den$dataset_batch == \T3\]
D = 0.064936, p-value = 2.627e-06
alternative hypothesis: two-sided

rr Stable5[5,2:4]=matrix(get_p_new(c(tt1\(p.value,tt2\)p.value,tt3\(p.value),c(tt1\)statistic,tt2\(statistic,tt3\)statistic)),1,3)

4 Monocle3 using scVI

#adata=ad$read_h5ad("../final_processed_results/scVI Results/monocytes_ALL/adata_all.h5ad")
adata=ad$read_h5ad("../final_processed_results/scVI Results New/monocytes_ALL/adata_all.h5ad")
cell.meta.data=py_to_r(adata$obs)
cell.meta.data$dataset_batch=plyr::mapvalues(cell.meta.data$batch_label,names(maprules),maprules)
gene_ann0=py_to_r(adata$var)
gene_ann=data.frame(gene_short_name = make.unique(rownames(gene_ann0)),
                    row.names = make.unique(rownames(gene_ann0)))
mtx=t(py_to_r(adata$X))
colnames(mtx)=cell.meta.data$cellname
rownames(mtx)=rownames(gene_ann)
mtx_sizefactor=1e4/colSums(mtx)

4.1 Using latent

#mtx=mtx[gene_ann$VarianceType=="HVG",]
cds <- new_cell_data_set(mtx, cell_metadata = cell.meta.data,gene_metadata =gene_ann)
## Step 1: Normalize and pre-process the data
cds <- preprocess_cds(cds, num_dim = 32,method="PCA",norm_method="log",verbose = F)
tmp0=py_to_r(adata$obsm["X_latent"])
colnames(tmp0)=paste0("PC",1:ncol(tmp0))
reducedDims(cds)$PCA=tmp0

## Step 2: Remove batch effects with cell alignment
##cds <- align_cds(cds, alignment_group = "BatchID", residual_model_formula_str = NULL)
## Step 3: Reduce the dimensions using UMAP
cds <- reduce_dimension(cds,reduction_method = "UMAP",preprocess_method="PCA",verbose = F)

## Step 4: Cluster the cells
cds <- cluster_cells(cds,reduction_method ="UMAP",cluster_method = "leiden",verbose = F)

# Construct the graph
# Note that, for the rest of the code to run, the graph should be fully (partionly) connected
## Step 5: Learn a graph
cds <- learn_graph(cds, use_partition = T,verbose = F)
colData(cds)$clusters=cds@clusters$UMAP$clusters
p1=plot_cells(cds,color_cells_by = "partition",label_cell_groups = F)+theme(legend.position = "top")
p2=plot_cells(cds,color_cells_by = "clusters",label_cell_groups=F,graph_label_size=2, label_leaves=F,label_branch_points=F)+theme(legend.position = "top")
p=cowplot::plot_grid(p1,p2,align = "h",ncol = 3)
p

## Step 6: Order cells
# root cells
ids=get_earliest_principal_node(cds,cluster=c("3"))
cds <- order_cells(cds, root_pr_nodes=ids)
#plot_cells(cds,color_cells_by = "pseudotime")
colData(cds)$pseudotime=pseudotime(cds)
colData(cds)$Pseudotime=colData(cds)$pseudotime/max(colData(cds)$pseudotime,na.rm = T)
#saveRDS(cds,file = "cds_scvi.rds")
df_den=pData(cds)[,c("Pseudotime","dataset_batch")]
df_den=as.data.frame(df_den[!is.infinite(df_den$Pseudotime),])
set.seed(10)

theme_use=theme(legend.text = element_text(size=16),
                legend.title = element_text(size=20))

p_scVI_latent_all_1=plot_cells(cds,color_cells_by = "dataset_batch",,graph_label_size=0,alpha=1,cell_size = 0.6)+
  guides(colour = guide_legend(override.aes = list(alpha=0.7, size=5)))+
  theme_use+
  theme(legend.position = "top")
          
p_scVI_latent_all_2=plot_cells(cds,color_cells_by = "Pseudotime",label_branch_points=T,graph_label_size=2,alpha=1,cell_size = 0.6)+
            theme(legend.position = "top",
              legend.title = element_text(vjust = 0.2),
              legend.text = element_text(angle=-50 ),
                  legend.key.height = unit(0.5,"cm"),
                  legend.key.width = unit(1,"cm"))+
            guides(color = guide_colourbar(label.position = "top"))+theme_use

p_scVI_latent_all_3=ggplot(data=df_den)+geom_density(aes(x=Pseudotime,fill=dataset_batch),alpha=0.7)+
            scale_y_continuous(expand = c(0,0))+
            scale_x_continuous(expand = c(0,0))+
            theme(legend.position="top")+theme_use

p_monocle_scVI_latent_all=egg::ggarrange(p_scVI_latent_all_1,p_scVI_latent_all_2,p_scVI_latent_all_3,ncol=3,draw=F)
p_monocle_scVI_latent_all

#cds_exprs=FetchData(obj0,vars = c("FCGR3A","S100A8"))
#df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,cds_exprs))
cds_exprs=as.matrix(SingleCellExperiment::counts(cds)[c("FCGR3A","S100A8"),])
#df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,log1p(t(cds_exprs)*mtx_sizefactor)))
df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,t(cds_exprs)))
df0$UMAP_1=reducedDims(cds)$UMAP[,1]
df0$UMAP_2=reducedDims(cds)$UMAP[,2]
df0$BatchID=pData(cds)$dataset_batch
df0=df0[is.finite(df0$pseudotime),]
df0=df0[order(df0$pseudotime,decreasing = F),,drop=F]
df0$x=df0$pseudotime/max(df0$pseudotime)
df_pseudotime_list$scVI_latent_all=df0
  • Feature plots of FCGR3A and S100A8
p=get_plot4(df00 = df0)
p

df_den=colData(cds)
tt1=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt1

    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == "T1"] and df_den$Pseudotime[df_den$dataset_batch == "T3"]
D = 0.44217, p-value < 2.2e-16
alternative hypothesis: two-sided
tt2=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T2"])
tt2

    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == "T1"] and df_den$Pseudotime[df_den$dataset_batch == "T2"]
D = 0.22787, p-value < 2.2e-16
alternative hypothesis: two-sided
tt3=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T2"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt3

    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == "T2"] and df_den$Pseudotime[df_den$dataset_batch == "T3"]
D = 0.22133, p-value < 2.2e-16
alternative hypothesis: two-sided
Stable5[6,2:4]=matrix(get_p_new(c(tt1$p.value,tt2$p.value,tt3$p.value),c(tt1$statistic,tt2$statistic,tt3$statistic)),1,3)

4.2 HVGs denoised

cds <- new_cell_data_set(mtx[rownames(mtx)%in%hvg_genes$genename,], cell_metadata = cell.meta.data,gene_metadata =gene_ann[gene_ann$gene_short_name%in%hvg_genes$genename,,drop=F])
## Step 1: Normalize and pre-process the data
cds <- preprocess_cds(cds, num_dim = 32,method="PCA",norm_method="log",verbose = F)

## Step 2: Remove batch effects with cell alignment
##cds <- align_cds(cds, alignment_group = "BatchID", residual_model_formula_str = NULL)
## Step 3: Reduce the dimensions using UMAP
cds <- reduce_dimension(cds,reduction_method = "UMAP",preprocess_method="PCA",verbose = F)

## Step 4: Cluster the cells
cds <- cluster_cells(cds,reduction_method ="UMAP",cluster_method = "leiden",verbose = F)

# Construct the graph
# Note that, for the rest of the code to run, the graph should be fully (partionly) connected
## Step 5: Learn a graph
cds <- learn_graph(cds, use_partition = T,verbose = F)
colData(cds)$clusters=cds@clusters$UMAP$clusters
p1=plot_cells(cds,color_cells_by = "partition",label_cell_groups = F)+theme(legend.position = "top")
p2=plot_cells(cds,color_cells_by = "clusters",label_cell_groups=F,graph_label_size=2, label_leaves=F,label_branch_points=F)+theme(legend.position = "top")
p=cowplot::plot_grid(p1,p2,align = "h",ncol = 3)
p

## Step 6: Order cells
# root cells
ids=get_earliest_principal_node(cds,cluster=c("3"))
cds <- order_cells(cds, root_pr_nodes=ids)
#plot_cells(cds,color_cells_by = "pseudotime")
colData(cds)$pseudotime=pseudotime(cds)
colData(cds)$Pseudotime=colData(cds)$pseudotime/max(colData(cds)$pseudotime,na.rm = T)
df_den=pData(cds)[,c("Pseudotime","dataset_batch")]
df_den=as.data.frame(df_den[!is.infinite(df_den$Pseudotime),])
set.seed(10)
theme_use=theme(legend.text = element_text(size=16),
                legend.title = element_text(size=20))
p_scvi_denoised_hvg_1=plot_cells(cds,color_cells_by = "dataset_batch",,graph_label_size=0,alpha=1,cell_size = 0.6)+
  guides(colour = guide_legend(override.aes = list(alpha=0.7, size=5)))+
  theme_use+
  theme(legend.position = "top")
          
p_scvi_denoised_hvg_2=plot_cells(cds,color_cells_by = "Pseudotime",label_branch_points=T,graph_label_size=2,alpha=1,cell_size = 0.6)+
            theme(legend.position = "top",
              legend.title = element_text(vjust = 0.2),
              legend.text = element_text(angle=-50 ),
                  legend.key.height = unit(0.5,"cm"),
                  legend.key.width = unit(1,"cm"))+
            guides(color = guide_colourbar(label.position = "top"))+theme_use
Cells aren't colored in a way that allows them to be grouped.
p_scvi_denoised_hvg_3=ggplot(data=df_den)+geom_density(aes(x=Pseudotime,fill=dataset_batch),alpha=0.7)+
            scale_y_continuous(expand = c(0,0))+
            scale_x_continuous(expand = c(0,0))+
            theme(legend.position="top")+theme_use
p_monocle_scvi_denoised_hvg=egg::ggarrange(p_scvi_denoised_hvg_1,p_scvi_denoised_hvg_2,p_scvi_denoised_hvg_3,ncol=3,draw=F)
p_monocle_scvi_denoised_hvg

#cds_exprs=FetchData(obj0,vars = c("FCGR3A","S100A8"))
#df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,cds_exprs))
cds_exprs=as.matrix(SingleCellExperiment::counts(cds)[c("FCGR3A","S100A8"),])
#df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,log1p(t(cds_exprs)*mtx_sizefactor)))
df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,t(cds_exprs)))
df0$UMAP_1=reducedDims(cds)$UMAP[,1]
df0$UMAP_2=reducedDims(cds)$UMAP[,2]
df0$BatchID=pData(cds)$dataset_batch
df0=df0[is.finite(df0$pseudotime),]
df0=df0[order(df0$pseudotime,decreasing = F),,drop=F]
df0$x=df0$pseudotime/max(df0$pseudotime)
df_pseudotime_list$scVI_denosied_hvg=df0
  • Feature plots of FCGR3A and S100A8
p=get_plot4(df00 = df0)
p

df_den=colData(cds)
tt1=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt1

    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == "T1"] and df_den$Pseudotime[df_den$dataset_batch == "T3"]
D = 0.516, p-value < 2.2e-16
alternative hypothesis: two-sided
tt2=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T2"])
tt2

    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == "T1"] and df_den$Pseudotime[df_den$dataset_batch == "T2"]
D = 0.3063, p-value < 2.2e-16
alternative hypothesis: two-sided
tt3=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T2"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt3

    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == "T2"] and df_den$Pseudotime[df_den$dataset_batch == "T3"]
D = 0.22798, p-value < 2.2e-16
alternative hypothesis: two-sided
Stable5[7,2:4]=matrix(get_p_new(c(tt1$p.value,tt2$p.value,tt3$p.value),c(tt1$statistic,tt2$statistic,tt3$statistic)),1,3)

4.3 All genes denoised

cds <- new_cell_data_set(mtx, cell_metadata = cell.meta.data,gene_metadata =gene_ann)
## Step 1: Normalize and pre-process the data
cds <- preprocess_cds(cds, num_dim = 32,method="PCA",norm_method="log",verbose = F)

## Step 2: Remove batch effects with cell alignment
##cds <- align_cds(cds, alignment_group = "BatchID", residual_model_formula_str = NULL)
## Step 3: Reduce the dimensions using UMAP
cds <- reduce_dimension(cds,reduction_method = "UMAP",preprocess_method="PCA",verbose = F)

## Step 4: Cluster the cells
cds <- cluster_cells(cds,reduction_method ="UMAP",cluster_method = "leiden",verbose = F)

# Construct the graph
# Note that, for the rest of the code to run, the graph should be fully (partionly) connected
## Step 5: Learn a graph
cds <- learn_graph(cds, use_partition = T,verbose = F)
colData(cds)$clusters=cds@clusters$UMAP$clusters
p1=plot_cells(cds,color_cells_by = "partition",label_cell_groups = F)+theme(legend.position = "top")
p2=plot_cells(cds,color_cells_by = "clusters",label_cell_groups=F,graph_label_size=2, label_leaves=F,label_branch_points=F)+theme(legend.position = "top")
p=cowplot::plot_grid(p1,p2,align = "h",ncol = 3)
p

## Step 6: Order cells
# root cells
ids=get_earliest_principal_node(cds,cluster=c("3"))
cds <- order_cells(cds, root_pr_nodes=ids)
#plot_cells(cds,color_cells_by = "pseudotime")
x0=pseudotime(cds)
x0[is.infinite(x0)]=NA
colData(cds)$pseudotime=x0

colData(cds)$Pseudotime=colData(cds)$pseudotime/max(colData(cds)$pseudotime,na.rm = T)
df_den=pData(cds)[,c("Pseudotime","dataset_batch")]
df_den=as.data.frame(df_den[!is.infinite(df_den$Pseudotime),])
set.seed(10)

theme_use=theme(legend.text = element_text(size=16),
                legend.title = element_text(size=20))

p_scVI_denoised_all_1=plot_cells(cds,color_cells_by = "dataset_batch",,graph_label_size=0,alpha=1,cell_size = 0.6)+
  guides(colour = guide_legend(override.aes = list(alpha=0.7, size=5)))+
  theme_use+
  theme(legend.position = "top")
          
p_scVI_denoised_all_2=plot_cells(cds,color_cells_by = "Pseudotime",label_branch_points=T,graph_label_size=2,alpha=1,cell_size = 0.6)+
            theme(legend.position = "top",
              legend.title = element_text(vjust = 0.2),
              legend.text = element_text(angle=-50 ),
                  legend.key.height = unit(0.5,"cm"),
                  legend.key.width = unit(1,"cm"))+
            guides(color = guide_colourbar(label.position = "top"))+theme_use

p_scVI_denoised_all_3=ggplot(data=df_den)+geom_density(aes(x=Pseudotime,fill=dataset_batch),alpha=0.7)+
            scale_y_continuous(expand = c(0,0))+
            scale_x_continuous(expand = c(0,0))+
            theme(legend.position="top")+theme_use

p_monocle_scVI_denoised_all=egg::ggarrange(p_scVI_denoised_all_1,p_scVI_denoised_all_2,p_scVI_denoised_all_3,ncol=3,draw=F)
p_monocle_scVI_denoised_all

#cds_exprs=FetchData(obj0,vars = c("FCGR3A","S100A8"))
#df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,cds_exprs))
cds_exprs=as.matrix(SingleCellExperiment::counts(cds)[c("FCGR3A","S100A8"),])
#df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,log1p(t(cds_exprs)*mtx_sizefactor)))
df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,t(cds_exprs)))
df0$UMAP_1=reducedDims(cds)$UMAP[,1]
df0$UMAP_2=reducedDims(cds)$UMAP[,2]
df0$BatchID=pData(cds)$dataset_batch
df0=df0[is.finite(df0$pseudotime),]
df0=df0[order(df0$pseudotime,decreasing = F),,drop=F]
df0$x=df0$pseudotime/max(df0$pseudotime)
df_pseudotime_list$scVI_denosied_all=df0
  • Feature plots of FCGR3A and S100A8
p=get_plot4(df00 = df0)
p

df_den=colData(cds)
tt1=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt1

    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == "T1"] and df_den$Pseudotime[df_den$dataset_batch == "T3"]
D = 0.52707, p-value < 2.2e-16
alternative hypothesis: two-sided
tt2=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T2"])
p-value will be approximate in the presence of ties
tt2

    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == "T1"] and df_den$Pseudotime[df_den$dataset_batch == "T2"]
D = 0.31699, p-value < 2.2e-16
alternative hypothesis: two-sided
tt3=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T2"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
p-value will be approximate in the presence of ties
tt3

    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == "T2"] and df_den$Pseudotime[df_den$dataset_batch == "T3"]
D = 0.26217, p-value < 2.2e-16
alternative hypothesis: two-sided
Stable5[8,2:4]=matrix(get_p_new(c(tt1$p.value,tt2$p.value,tt3$p.value),c(tt1$statistic,tt2$statistic,tt3$statistic)),1,3)

5 Monocle3 using dca+combat

#adata=ad$read_h5ad("../final_processed_results/dca Results/adata_all.h5ad")
adata=ad$read_h5ad("../final_processed_results/dca Results New/adata_all.h5ad")
cell.meta.data=py_to_r(adata$obs)
cell.meta.data$dataset_batch=plyr::mapvalues(cell.meta.data$batch_label,names(maprules),maprules)
gene_ann0=py_to_r(adata$var)
gene_ann=data.frame(gene_short_name = make.unique(rownames(gene_ann0)),
                    row.names = make.unique(rownames(gene_ann0)))
mtx=t(py_to_r(adata$X))
colnames(mtx)=cell.meta.data$cellname
rownames(mtx)=rownames(gene_ann)
mtx_sizefactor=1e4/colSums(mtx)

5.1 Using combated latent from dca

#mtx=mtx[gene_ann$VarianceType=="HVG",]
cds <- new_cell_data_set(mtx, cell_metadata = cell.meta.data,gene_metadata =gene_ann)
## Step 1: Normalize and pre-process the data
cds <- preprocess_cds(cds, num_dim = 32,method="PCA",norm_method="log")

tmp0=py_to_r(adata$obsm["X_dca_latent"]) #original dca
colnames(tmp0)=paste0("PC",1:ncol(tmp0))
reducedDims(cds)$PCA=tmp0

## Step 2: Remove batch effects with cell alignment
##cds <- align_cds(cds, alignment_group = "BatchID", residual_model_formula_str = NULL)
## Step 3: Reduce the dimensions using UMAP
cds <- reduce_dimension(cds,reduction_method = "UMAP",preprocess_method="PCA")

## Step 4: Cluster the cells
cds <- cluster_cells(cds,reduction_method ="UMAP",cluster_method = "leiden")

# Construct the graph
# Note that, for the rest of the code to run, the graph should be fully (partionly) connected
## Step 5: Learn a graph
cds <- learn_graph(cds, use_partition = T,verbose = F)
colData(cds)$clusters=cds@clusters$UMAP$clusters
p1=plot_cells(cds,color_cells_by = "partition",label_cell_groups = F)+theme(legend.position = "top")
p2=plot_cells(cds,color_cells_by = "clusters",label_cell_groups=F,graph_label_size=2, label_leaves=F,label_branch_points=F)+theme(legend.position = "top")
p=cowplot::plot_grid(p1,p2,align = "h",ncol = 3)
p

## Step 6: Order cells
# root cells
ids=get_earliest_principal_node(cds,cluster=c("4"))
cds <- order_cells(cds, root_pr_nodes=ids)
#plot_cells(cds,color_cells_by = "pseudotime")
colData(cds)$pseudotime=pseudotime(cds)
colData(cds)$Pseudotime=colData(cds)$pseudotime/max(colData(cds)$pseudotime,na.rm = T)
#saveRDS(cds,file="cds_dca.rds")
df_den=pData(cds)[,c("Pseudotime","dataset_batch")]
df_den=as.data.frame(df_den[!is.infinite(df_den$Pseudotime),])
set.seed(10)

theme_use=theme(legend.text = element_text(size=16),
                legend.title = element_text(size=20))

p_dca_latent_all_1=plot_cells(cds,color_cells_by = "dataset_batch",,graph_label_size=0,alpha=1,cell_size = 0.6)+
  guides(colour = guide_legend(override.aes = list(alpha=0.7, size=5)))+
  theme_use+
  theme(legend.position = "top")
          
p_dca_latent_all_2=plot_cells(cds,color_cells_by = "Pseudotime",label_branch_points=T,graph_label_size=2,alpha=1,cell_size = 0.6)+
            theme(legend.position = "top",
              legend.title = element_text(vjust = 0.2),
              legend.text = element_text(angle=-50 ),
                  legend.key.height = unit(0.5,"cm"),
                  legend.key.width = unit(1,"cm"))+
            guides(color = guide_colourbar(label.position = "top"))+theme_use

p_dca_latent_all_3=ggplot(data=df_den)+geom_density(aes(x=Pseudotime,fill=dataset_batch),alpha=0.7)+
            scale_y_continuous(expand = c(0,0))+
            scale_x_continuous(expand = c(0,0))+
            theme(legend.position="top")+theme_use

p_monocle_dca_latent_all=egg::ggarrange(p_dca_latent_all_1,p_dca_latent_all_2,p_dca_latent_all_3,ncol=3,draw=F)
p_monocle_dca_latent_all

#cds_exprs=FetchData(obj0,vars = c("FCGR3A","S100A8"))
#df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,cds_exprs))
cds_exprs=as.matrix(SingleCellExperiment::counts(cds)[c("FCGR3A","S100A8"),])
#df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,log1p(t(cds_exprs)*mtx_sizefactor)))
df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,t(cds_exprs)))
df0$UMAP_1=reducedDims(cds)$UMAP[,1]
df0$UMAP_2=reducedDims(cds)$UMAP[,2]
df0$BatchID=pData(cds)$dataset_batch
df0=df0[is.finite(df0$pseudotime),]
df0=df0[order(df0$pseudotime,decreasing = F),,drop=F]
df0$x=df0$pseudotime/max(df0$pseudotime)
df_pseudotime_list$dca_latent_all=df0
  • Feature plots of FCGR3A and S100A8
p=get_plot4(df00 = df0)
p

df_den=colData(cds)
tt1=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
p-value will be approximate in the presence of ties
tt1

    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == "T1"] and df_den$Pseudotime[df_den$dataset_batch == "T3"]
D = 0.12069, p-value < 2.2e-16
alternative hypothesis: two-sided
tt2=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T2"])
p-value will be approximate in the presence of ties
tt2

    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == "T1"] and df_den$Pseudotime[df_den$dataset_batch == "T2"]
D = 0.088498, p-value = 1.499e-14
alternative hypothesis: two-sided
tt3=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T2"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt3

    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == "T2"] and df_den$Pseudotime[df_den$dataset_batch == "T3"]
D = 0.14218, p-value < 2.2e-16
alternative hypothesis: two-sided
Stable5[9,2:4]=matrix(get_p_new(c(tt1$p.value,tt2$p.value,tt3$p.value),c(tt1$statistic,tt2$statistic,tt3$statistic)),1,3)

5.2 HVGs denoised

cds <- new_cell_data_set(mtx[rownames(mtx)%in%hvg_genes$genename,], cell_metadata = cell.meta.data,gene_metadata =gene_ann[gene_ann$gene_short_name%in%hvg_genes$genename,,drop=F])
## Step 1: Normalize and pre-process the data
cds <- preprocess_cds(cds, num_dim = 32,method="PCA",norm_method="log",verbose = F)

tmp0=py_to_r(adata$obsm["X_pcahvg"]) #original dca
colnames(tmp0)=paste0("PC",1:ncol(tmp0))
reducedDims(cds)$PCA=tmp0


## Step 2: Remove batch effects with cell alignment
##cds <- align_cds(cds, alignment_group = "BatchID", residual_model_formula_str = NULL)
## Step 3: Reduce the dimensions using UMAP
cds <- reduce_dimension(cds,reduction_method = "UMAP",preprocess_method="PCA",verbose = F)

## Step 4: Cluster the cells
cds <- cluster_cells(cds,reduction_method ="UMAP",cluster_method = "leiden",verbose = F)

# Construct the graph
# Note that, for the rest of the code to run, the graph should be fully (partionly) connected
## Step 5: Learn a graph
cds <- learn_graph(cds, use_partition = T,verbose = F)
colData(cds)$clusters=cds@clusters$UMAP$clusters
p1=plot_cells(cds,color_cells_by = "partition",label_cell_groups = F)+theme(legend.position = "top")
p2=plot_cells(cds,color_cells_by = "clusters",label_cell_groups=F,graph_label_size=2, label_leaves=F,label_branch_points=F)+theme(legend.position = "top")
p=cowplot::plot_grid(p1,p2,align = "h",ncol = 3)
p

## Step 6: Order cells
# root cells
ids=get_earliest_principal_node(cds,cluster=c("1","3","4"))
cds <- order_cells(cds, root_pr_nodes=ids)
#plot_cells(cds,color_cells_by = "pseudotime")
colData(cds)$pseudotime=pseudotime(cds)
colData(cds)$Pseudotime=colData(cds)$pseudotime/max(colData(cds)$pseudotime,na.rm = T)
df_den=pData(cds)[,c("Pseudotime","dataset_batch")]
df_den=as.data.frame(df_den[!is.infinite(df_den$Pseudotime),])
set.seed(10)

theme_use=theme(legend.text = element_text(size=16),
                legend.title = element_text(size=20))

p_dca_denoised_hvg_1=plot_cells(cds,color_cells_by = "dataset_batch",,graph_label_size=0,alpha=1,cell_size = 0.6)+
  guides(colour = guide_legend(override.aes = list(alpha=0.7, size=5)))+
  theme_use+
  theme(legend.position = "top")
          
p_dca_denoised_hvg_2=plot_cells(cds,color_cells_by = "Pseudotime",label_branch_points=T,graph_label_size=2,alpha=1,cell_size = 0.6)+
            theme(legend.position = "top",
              legend.title = element_text(vjust = 0.2),
              legend.text = element_text(angle=-50 ),
                  legend.key.height = unit(0.5,"cm"),
                  legend.key.width = unit(1,"cm"))+
            guides(color = guide_colourbar(label.position = "top"))+theme_use

p_dca_denoised_hvg_3=ggplot(data=df_den)+geom_density(aes(x=Pseudotime,fill=dataset_batch),alpha=0.7)+
            scale_y_continuous(expand = c(0,0))+
            scale_x_continuous(expand = c(0,0))+
            theme(legend.position="top")+theme_use

p_monocle_dca_denoised_hvg=egg::ggarrange(p_dca_denoised_hvg_1,p_dca_denoised_hvg_2,p_dca_denoised_hvg_3,ncol=3,draw=F)
p_monocle_dca_denoised_hvg

#cds_exprs=FetchData(obj0,vars = c("FCGR3A","S100A8"))
#df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,cds_exprs))
cds_exprs=as.matrix(SingleCellExperiment::counts(cds)[c("FCGR3A","S100A8"),])
df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,t(cds_exprs)))
df0$UMAP_1=reducedDims(cds)$UMAP[,1]
df0$UMAP_2=reducedDims(cds)$UMAP[,2]
df0$BatchID=pData(cds)$dataset_batch
df0=df0[is.finite(df0$pseudotime),]
df0=df0[order(df0$pseudotime,decreasing = F),,drop=F]
df0$x=df0$pseudotime/max(df0$pseudotime)
df_pseudotime_list$dca_denoised_hvg=df0
  • Feature plots of FCGR3A and S100A8
p=get_plot4(df00 = df0)
p

df_den=colData(cds)
tt1=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
p-value will be approximate in the presence of ties
tt1

    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == "T1"] and df_den$Pseudotime[df_den$dataset_batch == "T3"]
D = 0.48635, p-value < 2.2e-16
alternative hypothesis: two-sided
tt2=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T2"])
tt2

    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == "T1"] and df_den$Pseudotime[df_den$dataset_batch == "T2"]
D = 0.099731, p-value < 2.2e-16
alternative hypothesis: two-sided
tt3=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T2"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
p-value will be approximate in the presence of ties
tt3

    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == "T2"] and df_den$Pseudotime[df_den$dataset_batch == "T3"]
D = 0.50313, p-value < 2.2e-16
alternative hypothesis: two-sided
Stable5[10,2:4]=matrix(get_p_new(c(tt1$p.value,tt2$p.value,tt3$p.value),c(tt1$statistic,tt2$statistic,tt3$statistic)),1,3)

5.3 All genes denoised

cds <- new_cell_data_set(mtx, cell_metadata = cell.meta.data,gene_metadata =gene_ann)
## Step 1: Normalize and pre-process the data
cds <- preprocess_cds(cds, num_dim = 32,method="PCA",norm_method="log",verbose = F)

## Step 2: Remove batch effects with cell alignment
##cds <- align_cds(cds, alignment_group = "BatchID", residual_model_formula_str = NULL)
## Step 3: Reduce the dimensions using UMAP
cds <- reduce_dimension(cds,reduction_method = "UMAP",preprocess_method="PCA",verbose = F)

tmp0=py_to_r(adata$obsm["X_pcaall"]) #original dca
colnames(tmp0)=paste0("PC",1:ncol(tmp0))
reducedDims(cds)$PCA=tmp0


## Step 4: Cluster the cells
cds <- cluster_cells(cds,reduction_method ="UMAP",cluster_method = "leiden",verbose = F)

# Construct the graph
# Note that, for the rest of the code to run, the graph should be fully (partionly) connected
## Step 5: Learn a graph
cds <- learn_graph(cds, use_partition = T,verbose = F)
colData(cds)$clusters=cds@clusters$UMAP$clusters
p1=plot_cells(cds,color_cells_by = "partition",label_cell_groups = F)+theme(legend.position = "top")
p2=plot_cells(cds,color_cells_by = "clusters",label_cell_groups=F,graph_label_size=2, label_leaves=F,label_branch_points=F)+theme(legend.position = "top")
p=cowplot::plot_grid(p1,p2,align = "h",ncol = 3)
p

## Step 6: Order cells
# root cells
ids=get_earliest_principal_node(cds,cluster=c("2","4","5"))
cds <- order_cells(cds, root_pr_nodes=ids)
#plot_cells(cds,color_cells_by = "pseudotime")
colData(cds)$pseudotime=pseudotime(cds)
colData(cds)$Pseudotime=colData(cds)$pseudotime/max(colData(cds)$pseudotime,na.rm = T)
df_den=pData(cds)[,c("Pseudotime","dataset_batch")]
df_den=as.data.frame(df_den[!is.infinite(df_den$Pseudotime),])
set.seed(10)

theme_use=theme(legend.text = element_text(size=16),
                legend.title = element_text(size=20))

p_dca_denoised_all_1=plot_cells(cds,color_cells_by = "dataset_batch",,graph_label_size=0,alpha=1,cell_size = 0.6)+
  guides(colour = guide_legend(override.aes = list(alpha=0.7, size=5)))+
  theme_use+
  theme(legend.position = "top")
          
p_dca_denoised_all_2=plot_cells(cds,color_cells_by = "Pseudotime",label_branch_points=T,graph_label_size=2,alpha=1,cell_size = 0.6)+
            theme(legend.position = "top",
              legend.title = element_text(vjust = 0.2),
              legend.text = element_text(angle=-50 ),
                  legend.key.height = unit(0.5,"cm"),
                  legend.key.width = unit(1,"cm"))+
            guides(color = guide_colourbar(label.position = "top"))+theme_use

p_dca_denoised_all_3=ggplot(data=df_den)+geom_density(aes(x=Pseudotime,fill=dataset_batch),alpha=0.7)+
            scale_y_continuous(expand = c(0,0))+
            scale_x_continuous(expand = c(0,0))+
            theme(legend.position="top")+theme_use

p_monocle_dca_denoised_all=egg::ggarrange(p_dca_denoised_all_1,p_dca_denoised_all_2,p_dca_denoised_all_3,ncol=3,draw=F)
p_monocle_dca_denoised_all

#cds_exprs=FetchData(obj0,vars = c("FCGR3A","S100A8"))
#df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,cds_exprs))
cds_exprs=as.matrix(SingleCellExperiment::counts(cds)[c("FCGR3A","S100A8"),])
#df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,log1p(t(cds_exprs)*mtx_sizefactor)))
df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,t(cds_exprs)))
df0$UMAP_1=reducedDims(cds)$UMAP[,1]
df0$UMAP_2=reducedDims(cds)$UMAP[,2]
df0$BatchID=pData(cds)$dataset_batch
df0=df0[is.finite(df0$pseudotime),]
df0=df0[order(df0$pseudotime,decreasing = F),,drop=F]
df0$x=df0$pseudotime/max(df0$pseudotime)
df_pseudotime_list$dca_denoised_all=df0
  • Feature plots of FCGR3A and S100A8
p=get_plot4(df00 = df0)
p

df_den=colData(cds)
tt1=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
p-value will be approximate in the presence of ties
tt1

    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == "T1"] and df_den$Pseudotime[df_den$dataset_batch == "T3"]
D = 0.92747, p-value < 2.2e-16
alternative hypothesis: two-sided
tt2=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T2"])
p-value will be approximate in the presence of ties
tt2

    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == "T1"] and df_den$Pseudotime[df_den$dataset_batch == "T2"]
D = 0.14265, p-value < 2.2e-16
alternative hypothesis: two-sided
tt3=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T2"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
p-value will be approximate in the presence of ties
tt3

    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == "T2"] and df_den$Pseudotime[df_den$dataset_batch == "T3"]
D = 0.87006, p-value < 2.2e-16
alternative hypothesis: two-sided
Stable5[11,2:4]=matrix(get_p_new(c(tt1$p.value,tt2$p.value,tt3$p.value),c(tt1$statistic,tt2$statistic,tt3$statistic)),1,3)

6 Monocle3 using MNN

output=readRDS("../final_processed_results/MNN_corrected_all.rds")
mtx=output@assays$data$corrected
cell.meta.data=colData(output)
cell.meta.data$dataset_batch=plyr::mapvalues(cell.meta.data$batch_label,names(maprules),maprules)
gene_ann=data.frame(gene_short_name = make.unique(rownames(mtx)),
                    row.names = make.unique(rownames(mtx)))

6.0.1 HVGs denoised

#mtx=mtx[gene_ann$VarianceType=="HVG",]
cds <- new_cell_data_set(mtx[rownames(mtx)%in%hvg_genes$genename,], 
                         cell_metadata = cell.meta.data,
                         gene_metadata =gene_ann[gene_ann$gene_short_name%in%hvg_genes$genename,,drop=F])
## Step
## Step 1: Normalize and pre-process the data
cds <- preprocess_cds(cds, num_dim = 32,method="PCA",norm_method="log")
#tmp0=py_to_r(adata$obsm["X_dca_latent"])
#colnames(tmp0)=paste0("PC",1:ncol(tmp0))
#reducedDims(cds)$PCA=tmp0

## Step 2: Remove batch effects with cell alignment
##cds <- align_cds(cds, alignment_group = "BatchID", residual_model_formula_str = NULL)
## Step 3: Reduce the dimensions using UMAP
cds <- reduce_dimension(cds,reduction_method = "UMAP",preprocess_method="PCA")

## Step 4: Cluster the cells
cds <- cluster_cells(cds,reduction_method ="UMAP",cluster_method = "leiden")

# Construct the graph
# Note that, for the rest of the code to run, the graph should be fully (partionly) connected
## Step 5: Learn a graph
cds <- learn_graph(cds, use_partition = T,verbose = F)
colData(cds)$clusters=cds@clusters$UMAP$clusters
p1=plot_cells(cds,color_cells_by = "partition",label_cell_groups = F)+theme(legend.position = "top")
p2=plot_cells(cds,color_cells_by = "clusters",label_cell_groups=F,graph_label_size=2, label_leaves=F,label_branch_points=F)+theme(legend.position = "top")
p=cowplot::plot_grid(p1,p2,align = "h",ncol = 3)
p

## Step 6: Order cells
# root cells
ids=get_earliest_principal_node(cds,cluster=c("2","3","4"))
cds <- order_cells(cds, root_pr_nodes=ids)
#plot_cells(cds,color_cells_by = "pseudotime")
colData(cds)$pseudotime=pseudotime(cds)
colData(cds)$Pseudotime=colData(cds)$pseudotime/max(colData(cds)$pseudotime,na.rm = T)
#saveRDS(cds,file="cds_mnn.rds")
df_den=pData(cds)[,c("Pseudotime","dataset_batch")]
df_den=as.data.frame(df_den[!is.infinite(df_den$Pseudotime),])
set.seed(10)

theme_use=theme(legend.text = element_text(size=16),
                legend.title = element_text(size=20))

p_mnn_denoised_hvg_1=plot_cells(cds,color_cells_by = "dataset_batch",,graph_label_size=0,alpha=1,cell_size = 0.6)+
  guides(colour = guide_legend(override.aes = list(alpha=0.7, size=5)))+
  theme_use+
  theme(legend.position = "top")
          
p_mnn_denoised_hvg_2=plot_cells(cds,color_cells_by = "Pseudotime",label_branch_points=T,graph_label_size=2,alpha=1,cell_size = 0.6)+
            theme(legend.position = "top",
              legend.title = element_text(vjust = 0.2),
              legend.text = element_text(angle=-50 ),
                  legend.key.height = unit(0.5,"cm"),
                  legend.key.width = unit(1,"cm"))+
            guides(color = guide_colourbar(label.position = "top"))+theme_use

p_mnn_denoised_hvg_3=ggplot(data=df_den)+geom_density(aes(x=Pseudotime,fill=dataset_batch),alpha=0.7)+
            scale_y_continuous(expand = c(0,0))+
            scale_x_continuous(expand = c(0,0))+
            theme(legend.position="top")+theme_use

p_monocle_mnn_denoised_hvg=egg::ggarrange(p_mnn_denoised_hvg_1,p_mnn_denoised_hvg_2,p_mnn_denoised_hvg_3,ncol=3,draw=F)
p_monocle_mnn_denoised_hvg

#cds_exprs=FetchData(obj0,vars = c("FCGR3A","S100A8"))
#df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,cds_exprs))
cds_exprs=as.matrix(SingleCellExperiment::counts(cds)[c("FCGR3A","S100A8"),])
df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,t(cds_exprs)*mtx_sizefactor))
df0$UMAP_1=reducedDims(cds)$UMAP[,1]
df0$UMAP_2=reducedDims(cds)$UMAP[,2]
df0$BatchID=pData(cds)$dataset_batch
df0=df0[is.finite(df0$pseudotime),]
df0=df0[order(df0$pseudotime,decreasing = F),,drop=F]
df0$x=df0$pseudotime/max(df0$pseudotime)
df_pseudotime_list$mnn_denoised_hvg=df0
  • Feature plots of FCGR3A and S100A8
p=get_plot4(df00 = df0)
p

df_den=colData(cds)
tt1=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
p-value will be approximate in the presence of ties
tt1

    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == "T1"] and df_den$Pseudotime[df_den$dataset_batch == "T3"]
D = 0.43599, p-value < 2.2e-16
alternative hypothesis: two-sided
tt2=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T2"])
p-value will be approximate in the presence of ties
tt2

    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == "T1"] and df_den$Pseudotime[df_den$dataset_batch == "T2"]
D = 0.73449, p-value < 2.2e-16
alternative hypothesis: two-sided
tt3=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T2"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
p-value will be approximate in the presence of ties
tt3

    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == "T2"] and df_den$Pseudotime[df_den$dataset_batch == "T3"]
D = 0.86706, p-value < 2.2e-16
alternative hypothesis: two-sided
Stable5[12,2:4]=matrix(get_p_new(c(tt1$p.value,tt2$p.value,tt3$p.value),c(tt1$statistic,tt2$statistic,tt3$statistic)),1,3)

6.0.2 All genes denoised

output=readRDS("../final_processed_results/MNN_corrected_all.rds")
mtx=output@assays$data$corrected
cell.meta.data=colData(output)
cell.meta.data$dataset_batch=plyr::mapvalues(cell.meta.data$batch_label,names(maprules),maprules)
gene_ann=data.frame(gene_short_name = make.unique(rownames(mtx)),
                    row.names = make.unique(rownames(mtx)))
#colnames(mtx)=colData(output)$cellname
#rownames(mtx)=rownames(gene_ann)
#mtx_sizefactor=1e4/colSums(mtx)
#mtx=mtx[gene_ann$VarianceType=="HVG",]
cds <- new_cell_data_set(mtx, cell_metadata = cell.meta.data,gene_metadata =gene_ann)
## Step 1: Normalize and pre-process the data
cds <- preprocess_cds(cds, num_dim = 32,method="PCA",norm_method="log")
#tmp0=py_to_r(adata$obsm["X_dca_latent"])
#colnames(tmp0)=paste0("PC",1:ncol(tmp0))
#reducedDims(cds)$PCA=tmp0

## Step 2: Remove batch effects with cell alignment
##cds <- align_cds(cds, alignment_group = "BatchID", residual_model_formula_str = NULL)
## Step 3: Reduce the dimensions using UMAP
cds <- reduce_dimension(cds,reduction_method = "UMAP",preprocess_method="PCA")

## Step 4: Cluster the cells
cds <- cluster_cells(cds,reduction_method ="UMAP",cluster_method = "leiden")

# Construct the graph
# Note that, for the rest of the code to run, the graph should be fully (partionly) connected
## Step 5: Learn a graph
cds <- learn_graph(cds, use_partition = T,verbose = F)
colData(cds)$clusters=cds@clusters$UMAP$clusters
p1=plot_cells(cds,color_cells_by = "partition",label_cell_groups = F)+theme(legend.position = "top")
p2=plot_cells(cds,color_cells_by = "clusters",label_cell_groups=F,graph_label_size=2, label_leaves=F,label_branch_points=F)+theme(legend.position = "top")
p=cowplot::plot_grid(p1,p2,align = "h",ncol = 3)
p

## Step 6: Order cells
# root cells
ids=get_earliest_principal_node(cds,cluster=c("6","3","5"))
cds <- order_cells(cds, root_pr_nodes=ids)
#plot_cells(cds,color_cells_by = "pseudotime")
colData(cds)$pseudotime=pseudotime(cds)
colData(cds)$Pseudotime=colData(cds)$pseudotime/max(colData(cds)$pseudotime,na.rm = T)
df_den=pData(cds)[,c("Pseudotime","dataset_batch")]
df_den=as.data.frame(df_den[!is.infinite(df_den$Pseudotime),])
set.seed(10)

theme_use=theme(legend.text = element_text(size=16),
                legend.title = element_text(size=20))

p_mnn_denoised_all_1=plot_cells(cds,color_cells_by = "dataset_batch",,graph_label_size=0,alpha=1,cell_size = 0.6)+
  guides(colour = guide_legend(override.aes = list(alpha=0.7, size=5)))+
  theme_use+
  theme(legend.position = "top")
          
p_mnn_denoised_all_2=plot_cells(cds,color_cells_by = "Pseudotime",label_branch_points=T,graph_label_size=2,alpha=1,cell_size = 0.6)+
            theme(legend.position = "top",
              legend.title = element_text(vjust = 0.2),
              legend.text = element_text(angle=-50 ),
                  legend.key.height = unit(0.5,"cm"),
                  legend.key.width = unit(1,"cm"))+
            guides(color = guide_colourbar(label.position = "top"))+theme_use

p_mnn_denoised_all_3=ggplot(data=df_den)+geom_density(aes(x=Pseudotime,fill=dataset_batch),alpha=0.7)+
            scale_y_continuous(expand = c(0,0))+
            scale_x_continuous(expand = c(0,0))+
            theme(legend.position="top")+theme_use

p_monocle_mnn_denoised_all=egg::ggarrange(p_mnn_denoised_all_1,p_mnn_denoised_all_2,p_mnn_denoised_all_3,ncol=3,draw=F)
p_monocle_mnn_denoised_all

#cds_exprs=FetchData(obj0,vars = c("FCGR3A","S100A8"))
#df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,cds_exprs))
cds_exprs=as.matrix(SingleCellExperiment::counts(cds)[c("FCGR3A","S100A8"),])
df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,t(cds_exprs)*mtx_sizefactor))
df0$UMAP_1=reducedDims(cds)$UMAP[,1]
df0$UMAP_2=reducedDims(cds)$UMAP[,2]
df0$BatchID=pData(cds)$dataset_batch
df0=df0[is.finite(df0$pseudotime),]
df0=df0[order(df0$pseudotime,decreasing = F),,drop=F]
df0$x=df0$pseudotime/max(df0$pseudotime)
df_pseudotime_list$mnn_denoised_all=df0
  • Feature plots of FCGR3A and S100A8
p=get_plot4(df00 = df0)
p

df_den=colData(cds)
tt1=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
p-value will be approximate in the presence of ties
tt1

    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == "T1"] and df_den$Pseudotime[df_den$dataset_batch == "T3"]
D = 0.37703, p-value < 2.2e-16
alternative hypothesis: two-sided
tt2=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T2"])
p-value will be approximate in the presence of ties
tt2

    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == "T1"] and df_den$Pseudotime[df_den$dataset_batch == "T2"]
D = 0.55184, p-value < 2.2e-16
alternative hypothesis: two-sided
tt3=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T2"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
p-value will be approximate in the presence of ties
tt3

    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == "T2"] and df_den$Pseudotime[df_den$dataset_batch == "T3"]
D = 0.53852, p-value < 2.2e-16
alternative hypothesis: two-sided
Stable5[13,2:4]=matrix(get_p_new(c(tt1$p.value,tt2$p.value,tt3$p.value),c(tt1$statistic,tt2$statistic,tt3$statistic)),1,3)

7 Monocle3 using scanorama

adata=ad$read_h5ad("../final_processed_results/scanorama Results/adata_ALL.h5ad")#
cell.meta.data=py_to_r(adata$obs)
cell.meta.data$dataset_batch=plyr::mapvalues(cell.meta.data$batch_label,names(maprules),maprules)
gene_ann0=py_to_r(adata$raw$var)
gene_ann=data.frame(gene_short_name = make.unique(rownames(gene_ann0)),
                    row.names = make.unique(rownames(gene_ann0)))
mtx=t(py_to_r(adata$X$tocsc()))#adata$raw
colnames(mtx)=cell.meta.data$cellname
rownames(mtx)=rownames(gene_ann)
mtx_sizefactor=1e4/colSums(mtx)

7.1 Using latent

#mtx=mtx[gene_ann$VarianceType=="HVG",]
cds <- new_cell_data_set(mtx, cell_metadata = cell.meta.data,gene_metadata =gene_ann)
## Step 1: Normalize and pre-process the data
cds <- preprocess_cds(cds, num_dim = 50,method="PCA",norm_method="log")

tmp0=py_to_r(adata$obsm["X_scanorama"])
colnames(tmp0)=paste0("PC",1:ncol(tmp0))
reducedDims(cds)$PCA=tmp0

## Step 2: Remove batch effects with cell alignment
##cds <- align_cds(cds, alignment_group = "BatchID", residual_model_formula_str = NULL)
## Step 3: Reduce the dimensions using UMAP
cds <- reduce_dimension(cds,reduction_method = "UMAP",preprocess_method="PCA")

## Step 4: Cluster the cells
cds <- cluster_cells(cds,reduction_method ="UMAP",cluster_method = "leiden")

# Construct the graph
# Note that, for the rest of the code to run, the graph should be fully (partionly) connected
## Step 5: Learn a graph
cds <- learn_graph(cds, use_partition = T,verbose = F)
colData(cds)$clusters=cds@clusters$UMAP$clusters
p1=plot_cells(cds,color_cells_by = "partition",label_cell_groups = F)+theme(legend.position = "top")
p2=plot_cells(cds,color_cells_by = "clusters",label_cell_groups=F,graph_label_size=2, label_leaves=F,label_branch_points=F)+theme(legend.position = "top")
p=cowplot::plot_grid(p1,p2,align = "h",ncol = 3)
p

## Step 6: Order cells
# root cells
ids=get_earliest_principal_node(cds,cluster=c("2"))#need to specify
cds <- order_cells(cds, root_pr_nodes=ids)
#plot_cells(cds,color_cells_by = "pseudotime")
colData(cds)$pseudotime=pseudotime(cds)
colData(cds)$Pseudotime=colData(cds)$pseudotime/max(colData(cds)$pseudotime,na.rm = T)
#saveRDS(cds,file="cds_scanorama.rds")
df_den=pData(cds)[,c("Pseudotime","dataset_batch")]
df_den=as.data.frame(df_den[!is.infinite(df_den$Pseudotime),])
set.seed(10)
theme_use=theme(legend.text = element_text(size=16),
                legend.title = element_text(size=20))
p_monocle_scanorama_latent_hvg_1=plot_cells(cds,color_cells_by = "dataset_batch",,graph_label_size=0,alpha=1,cell_size = 0.6)+
  guides(colour = guide_legend(override.aes = list(alpha=0.7, size=5)))+
  theme_use+
  theme(legend.position = "top")
          
p_monocle_scanorama_latent_hvg_2=plot_cells(cds,color_cells_by = "Pseudotime",label_branch_points=T,graph_label_size=2,alpha=1,cell_size = 0.6)+
            theme(legend.position = "top",
              legend.title = element_text(vjust = 0.2),
              legend.text = element_text(angle=-50 ),
                  legend.key.height = unit(0.5,"cm"),
                  legend.key.width = unit(1,"cm"))+
            guides(color = guide_colourbar(label.position = "top"))+theme_use
Cells aren't colored in a way that allows them to be grouped.
p_monocle_scanorama_latent_hvg_3=ggplot(data=df_den)+geom_density(aes(x=Pseudotime,fill=dataset_batch),alpha=0.7)+
            scale_y_continuous(expand = c(0,0))+
            scale_x_continuous(expand = c(0,0))+
            theme(legend.position="top")+theme_use
p_monocle_scanorama_latent_hvg=egg::ggarrange(p_monocle_scanorama_latent_hvg_1,p_monocle_scanorama_latent_hvg_2,p_monocle_scanorama_latent_hvg_3,ncol=3,draw=F)
p_monocle_scanorama_latent_hvg

#cds_exprs=FetchData(obj0,vars = c("FCGR3A","S100A8"))
#df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,cds_exprs))
cds_exprs=as.matrix(SingleCellExperiment::counts(cds)[c("FCGR3A","S100A8"),])
df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,t(cds_exprs)))
df0$UMAP_1=reducedDims(cds)$UMAP[,1]
df0$UMAP_2=reducedDims(cds)$UMAP[,2]
df0$BatchID=pData(cds)$dataset_batch
df0=df0[is.finite(df0$pseudotime),]
df0=df0[order(df0$pseudotime,decreasing = F),,drop=F]
df0$x=df0$pseudotime/max(df0$pseudotime)
df_pseudotime_list$scanorama_latent_hvg=df0
  • Feature plots of FCGR3A and S100A8
p=get_plot4(df00 = df0)
p

df_den=colData(cds)
tt1=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
p-value will be approximate in the presence of ties
tt1

    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == "T1"] and df_den$Pseudotime[df_den$dataset_batch == "T3"]
D = 0.18479, p-value < 2.2e-16
alternative hypothesis: two-sided
tt2=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T2"])
p-value will be approximate in the presence of ties
tt2

    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == "T1"] and df_den$Pseudotime[df_den$dataset_batch == "T2"]
D = 0.34817, p-value < 2.2e-16
alternative hypothesis: two-sided
tt3=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T2"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt3

    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == "T2"] and df_den$Pseudotime[df_den$dataset_batch == "T3"]
D = 0.19376, p-value < 2.2e-16
alternative hypothesis: two-sided
Stable5[14,2:4]=matrix(get_p_new(c(tt1$p.value,tt2$p.value,tt3$p.value),c(tt1$statistic,tt2$statistic,tt3$statistic)),1,3)

7.2 HVGs denoised

#cds <- new_cell_data_set(mtx[rownames(mtx)%in%hvg_genes$genename,], cell_metadata = cell.meta.data,gene_metadata =gene_ann[gene_ann$gene_short_name%in%hvg_genes$genename,,drop=F])
cds <- new_cell_data_set(mtx, cell_metadata = cell.meta.data,gene_metadata =gene_ann)
## Step 1: Normalize and pre-process the data
cds <- preprocess_cds(cds, num_dim = 50,method="PCA",norm_method="log",verbose = F)
tmp0=py_to_r(adata$obsm["X_hvgpca"])
colnames(tmp0)=paste0("PC",1:ncol(tmp0))
reducedDims(cds)$PCA=tmp0
## Step 2: Remove batch effects with cell alignment
##cds <- align_cds(cds, alignment_group = "BatchID", residual_model_formula_str = NULL)
## Step 3: Reduce the dimensions using UMAP
cds <- reduce_dimension(cds,reduction_method = "UMAP",preprocess_method="PCA",verbose = F)
## Step 4: Cluster the cells
cds <- cluster_cells(cds,reduction_method ="UMAP",cluster_method = "leiden",verbose = F)
# Construct the graph
# Note that, for the rest of the code to run, the graph should be fully (partionly) connected
## Step 5: Learn a graph
cds <- learn_graph(cds, use_partition = T,verbose = F)

  |                                                                                                                          
  |                                                                                                                    |   0%
  |                                                                                                                          
  |====================================================================================================================| 100%
colData(cds)$clusters=cds@clusters$UMAP$clusters
p1=plot_cells(cds,color_cells_by = "partition",label_cell_groups = F)+theme(legend.position = "top")
p2=plot_cells(cds,color_cells_by = "clusters",label_cell_groups=F,graph_label_size=2, label_leaves=F,label_branch_points=F)+theme(legend.position = "top")
p=cowplot::plot_grid(p1,p2,align = "h",ncol = 3)
p

## Step 6: Order cells
# root cells
ids=get_earliest_principal_node(cds,cluster=c("2"))
cds <- order_cells(cds, root_pr_nodes=ids)
#plot_cells(cds,color_cells_by = "pseudotime")
colData(cds)$pseudotime=pseudotime(cds)
colData(cds)$Pseudotime=colData(cds)$pseudotime/max(colData(cds)$pseudotime,na.rm = T)
df_den=pData(cds)[,c("Pseudotime","dataset_batch")]
df_den=as.data.frame(df_den[!is.infinite(df_den$Pseudotime),])
set.seed(10)
theme_use=theme(legend.text = element_text(size=16),
                legend.title = element_text(size=20))
p_monocle_scanorama_denoised_hvg_1=plot_cells(cds,color_cells_by = "dataset_batch",,graph_label_size=0,alpha=1,cell_size = 0.6)+
  guides(colour = guide_legend(override.aes = list(alpha=0.7, size=5)))+
  theme_use+
  theme(legend.position = "top")
          
p_monocle_scanorama_denoised_hvg_2=plot_cells(cds,color_cells_by = "Pseudotime",label_branch_points=T,graph_label_size=2,alpha=1,cell_size = 0.6)+
            theme(legend.position = "top",
              legend.title = element_text(vjust = 0.2),
              legend.text = element_text(angle=-50 ),
                  legend.key.height = unit(0.5,"cm"),
                  legend.key.width = unit(1,"cm"))+
            guides(color = guide_colourbar(label.position = "top"))+theme_use
Cells aren't colored in a way that allows them to be grouped.
p_monocle_scanorama_denoised_hvg_3=ggplot(data=df_den)+geom_density(aes(x=Pseudotime,fill=dataset_batch),alpha=0.7)+
            scale_y_continuous(expand = c(0,0))+
            scale_x_continuous(expand = c(0,0))+
            theme(legend.position="top")+theme_use
p_monocle_scanorama_denoised_hvg=egg::ggarrange(p_monocle_scanorama_denoised_hvg_1,p_monocle_scanorama_denoised_hvg_2,p_monocle_scanorama_denoised_hvg_3,ncol=3,draw=F)
p_monocle_scanorama_denoised_hvg

#cds_exprs=FetchData(obj0,vars = c("FCGR3A","S100A8"))
#df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,cds_exprs))
cds_exprs=as.matrix(SingleCellExperiment::counts(cds)[c("FCGR3A","S100A8"),])
df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,t(cds_exprs)*mtx_sizefactor))
df0$UMAP_1=reducedDims(cds)$UMAP[,1]
df0$UMAP_2=reducedDims(cds)$UMAP[,2]
df0$BatchID=pData(cds)$dataset_batch
df0=df0[is.finite(df0$pseudotime),]
df0=df0[order(df0$pseudotime,decreasing = F),,drop=F]
df0$x=df0$pseudotime/max(df0$pseudotime)
df_pseudotime_list$scanorama_denoised_hvg=df0
  • Feature plots of FCGR3A and S100A8
p=get_plot4(df00 = df0)
p

df_den=colData(cds)
tt1=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt1

    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == "T1"] and df_den$Pseudotime[df_den$dataset_batch == "T3"]
D = 0.18717, p-value < 2.2e-16
alternative hypothesis: two-sided
tt2=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T2"])
tt2

    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == "T1"] and df_den$Pseudotime[df_den$dataset_batch == "T2"]
D = 0.30887, p-value < 2.2e-16
alternative hypothesis: two-sided
tt3=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T2"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt3

    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == "T2"] and df_den$Pseudotime[df_den$dataset_batch == "T3"]
D = 0.49383, p-value < 2.2e-16
alternative hypothesis: two-sided
Stable5[15,2:4]=matrix(get_p_new(c(tt1$p.value,tt2$p.value,tt3$p.value),c(tt1$statistic,tt2$statistic,tt3$statistic)),1,3)

7.3 All genes denoised

#cds <- new_cell_data_set(mtx[rownames(mtx)%in%hvg_genes$genename,], cell_metadata = cell.meta.data,gene_metadata =gene_ann[gene_ann$gene_short_name%in%hvg_genes$genename,,drop=F])
cds <- new_cell_data_set(mtx, cell_metadata = cell.meta.data,gene_metadata =gene_ann)
## Step 1: Normalize and pre-process the data
cds <- preprocess_cds(cds, num_dim = 30,method="PCA",norm_method="log",verbose = F)

tmp0=py_to_r(adata$obsm["X_pca"])
colnames(tmp0)=paste0("PC",1:ncol(tmp0))
reducedDims(cds)$PCA=tmp0[,1:30]

## Step 2: Remove batch effects with cell alignment
##cds <- align_cds(cds, alignment_group = "BatchID", residual_model_formula_str = NULL)
## Step 3: Reduce the dimensions using UMAP
cds <- reduce_dimension(cds,reduction_method = "UMAP",preprocess_method="PCA",verbose = F)

## Step 4: Cluster the cells
cds <- cluster_cells(cds,reduction_method ="UMAP",cluster_method = "leiden",verbose = F)

# Construct the graph
# Note that, for the rest of the code to run, the graph should be fully (partionly) connected
## Step 5: Learn a graph
cds <- learn_graph(cds, use_partition = T,verbose = F)
colData(cds)$clusters=cds@clusters$UMAP$clusters
p1=plot_cells(cds,color_cells_by = "partition",label_cell_groups = F)+theme(legend.position = "top")
p2=plot_cells(cds,color_cells_by = "clusters",label_cell_groups=F,graph_label_size=2, label_leaves=F,label_branch_points=F)+theme(legend.position = "top")
p=cowplot::plot_grid(p1,p2,align = "h",ncol = 3)
p

## Step 6: Order cells
# root cells
ids=get_earliest_principal_node(cds,cluster=c("1"))
cds <- order_cells(cds, root_pr_nodes=ids)
#plot_cells(cds,color_cells_by = "pseudotime")
colData(cds)$pseudotime=pseudotime(cds)
colData(cds)$Pseudotime=colData(cds)$pseudotime/max(colData(cds)$pseudotime,na.rm = T)
df_den=pData(cds)[,c("Pseudotime","dataset_batch")]
df_den=as.data.frame(df_den[!is.infinite(df_den$Pseudotime),])
set.seed(10)
theme_use=theme(legend.text = element_text(size=16),
                legend.title = element_text(size=20))
p_monocle_scanorama_denoised_all_1=plot_cells(cds,color_cells_by = "dataset_batch",,graph_label_size=0,alpha=1,cell_size = 0.6)+
  guides(colour = guide_legend(override.aes = list(alpha=0.7, size=5)))+
  theme_use+
  theme(legend.position = "top")
          
p_monocle_scanorama_denoised_all_2=plot_cells(cds,color_cells_by = "Pseudotime",label_branch_points=T,graph_label_size=2,alpha=1,cell_size = 0.6)+
            theme(legend.position = "top",
              legend.title = element_text(vjust = 0.2),
              legend.text = element_text(angle=-50 ),
                  legend.key.height = unit(0.5,"cm"),
                  legend.key.width = unit(1,"cm"))+
            guides(color = guide_colourbar(label.position = "top"))+theme_use
Cells aren't colored in a way that allows them to be grouped.
p_monocle_scanorama_denoised_all_3=ggplot(data=df_den)+geom_density(aes(x=Pseudotime,fill=dataset_batch),alpha=0.7)+
            scale_y_continuous(expand = c(0,0))+
            scale_x_continuous(expand = c(0,0))+
            theme(legend.position="top")+theme_use
p_monocle_scanorama_denoised_all=egg::ggarrange(p_monocle_scanorama_denoised_all_1,p_monocle_scanorama_denoised_all_2,p_monocle_scanorama_denoised_all_3,ncol=3,draw=F)
p_monocle_scanorama_denoised_all

#cds_exprs=FetchData(obj0,vars = c("FCGR3A","S100A8"))
#df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,cds_exprs))
cds_exprs=as.matrix(SingleCellExperiment::counts(cds)[c("FCGR3A","S100A8"),])
df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,t(cds_exprs)*mtx_sizefactor))
df0$UMAP_1=reducedDims(cds)$UMAP[,1]
df0$UMAP_2=reducedDims(cds)$UMAP[,2]
df0$BatchID=pData(cds)$dataset_batch
df0=df0[is.finite(df0$pseudotime),]
df0=df0[order(df0$pseudotime,decreasing = F),,drop=F]
df0$x=df0$pseudotime/max(df0$pseudotime)
df_pseudotime_list$scanorama_denoised_all=df0
  • Feature plots of FCGR3A and S100A8
p=get_plot4(df00 = df0)
p

df_den=colData(cds)
tt1=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
p-value will be approximate in the presence of ties
tt1

    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == "T1"] and df_den$Pseudotime[df_den$dataset_batch == "T3"]
D = 0.14931, p-value < 2.2e-16
alternative hypothesis: two-sided
tt2=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T2"])
p-value will be approximate in the presence of ties
tt2

    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == "T1"] and df_den$Pseudotime[df_den$dataset_batch == "T2"]
D = 0.39706, p-value < 2.2e-16
alternative hypothesis: two-sided
tt3=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T2"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt3

    Two-sample Kolmogorov-Smirnov test

data:  df_den$Pseudotime[df_den$dataset_batch == "T2"] and df_den$Pseudotime[df_den$dataset_batch == "T3"]
D = 0.29988, p-value < 2.2e-16
alternative hypothesis: two-sided
Stable5[16,2:4]=matrix(get_p_new(c(tt1$p.value,tt2$p.value,tt3$p.value),c(tt1$statistic,tt2$statistic,tt3$statistic)),1,3)
suppressPackageStartupMessages(library(cowplot))
#Supplementary Table 5
write.table(Stable5,file="KS_table.csv",sep=",")
openxlsx::write.xlsx(Stable5,file="KS_table.xlsx")
Stable5 %>%
  kable() %>%
  kable_styling()
Method T1.v.s..T2 T1.v.s..T3 T2.v.s..T3
Raw count HVGs Raw count HVGs 0.572 (<2.2e-16) 0.461 (<2.2e-16) 0.709 (<2.2e-16)
Raw count All Raw count All 0.925 (<2.2e-16) 0.684 (<2.2e-16) 0.775 (<2.2e-16)
CarDEC (latent) CarDEC (latent) 0.052 (7.98e-04) 0.034 (1.84e-02) 0.052 (2.92e-04)
CarDEC (denoised HVGs) CarDEC (denoised HVGs) 0.069 (1.85e-06) 0.067 (1.79e-08) 0.067 (1.2e-06)
CarDEC (denoised All) CarDEC (denoised All) 0.127 (<2.2e-16) 0.07 (3.67e-09) 0.065 (2.63e-06)
scVI (latent) scVI (latent) 0.442 (<2.2e-16) 0.228 (<2.2e-16) 0.221 (<2.2e-16)
scVI (denoised HVGs) scVI (denoised HVGs) 0.516 (<2.2e-16) 0.306 (<2.2e-16) 0.228 (<2.2e-16)
scVI (denoised All) scVI (denoised All) 0.527 (<2.2e-16) 0.317 (<2.2e-16) 0.262 (<2.2e-16)
DCA (latent) DCA (latent) 0.121 (<2.2e-16) 0.088 (1.5e-14) 0.142 (<2.2e-16)
DCA (denoised HVGs) DCA (denoised HVGs) 0.486 (<2.2e-16) 0.1 (<2.2e-16) 0.503 (<2.2e-16)
DCA (denoised All) DCA (denoised All) 0.927 (<2.2e-16) 0.143 (<2.2e-16) 0.87 (<2.2e-16)
MNN (denoised HVGs) MNN (denoised HVGs) 0.436 (<2.2e-16) 0.734 (<2.2e-16) 0.867 (<2.2e-16)
MNN (denoised All) MNN (denoised All) 0.377 (<2.2e-16) 0.552 (<2.2e-16) 0.539 (<2.2e-16)
Scanorama (latent) Scanorama (latent) 0.185 (<2.2e-16) 0.348 (<2.2e-16) 0.194 (<2.2e-16)
Scanorama (denoised HVGs) Scanorama (denoised HVGs) 0.187 (<2.2e-16) 0.309 (<2.2e-16) 0.494 (<2.2e-16)
Scanorama (denoised All) Scanorama (denoised All) 0.149 (<2.2e-16) 0.397 (<2.2e-16) 0.3 (<2.2e-16)

8 Figures

8.1 Main Figure (Figure 5)

fig_width=30
fig_height=25
labels=letters[1:5]
Methods=c("Raw count HVGs","Raw count All","CarDEC (latent)","CarDEC (denoised HVGs)","CarDEC (denoised All)","scVI (latent)","scVI (denoised HVGs)","scVI (denoised All)","DCA (latent)","DCA (denoised HVGs)","DCA (denoised All)","MNN (denoised HVGs)","MNN (denoised All)","Scanorama (latent)","Scanorama (denoised HVGs)","Scanorama (denoised All)")
use_id=c(4,15,10,7,12)
get_draw_plot=function(plot_id=1,plist0){
  x=0.02
  y=1-plot_id/5
  width=0.98
  height=1/5-0.01 # total number of figures is 12
  pp=draw_plot(egg::ggarrange(plots=plist0,nrow = 1,draw = F,newpage = F),x = x,y = y,width = width,height = height)
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(pp)
}
get_label_pos=function(plot_id=1){
  x=0
  y=1-plot_id/5
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(c(x,y+1/5-1/30))
}
get_title_pos=function(plot_id=1){
  x=0.015
  y=1-plot_id/5
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(c(x,y+1/10-1/60))
}
get_plot_list=function(x,y){
  x0=rep(list(),length=length(x)+length(y))
  x0[1:length(x)]=x[1:3]
  x0[(length(x)+1):length(x0)]=y[1:2]
  return(x0)
}
p=ggdraw()+get_draw_plot(1,get_plot_list(list(p_carDEC_denoised_hvg_1+scale_color_brewer(name="dataset_batch",palette = "Set2"),
                                              p_carDEC_denoised_hvg_2,
                                              p_carDEC_denoised_hvg_3+scale_fill_brewer(name="dataset_batch",palette = "Set2")),
                                         get_plot4_sep(df_pseudotime_list[[4]])[c(3,4)]))+
get_draw_plot(2,get_plot_list(list(p_monocle_scanorama_denoised_hvg_1+scale_color_brewer(name="dataset_batch",palette = "Set2"),
                                   p_monocle_scanorama_denoised_hvg_2,
                                   p_monocle_scanorama_denoised_hvg_3+scale_fill_brewer(name="dataset_batch",palette = "Set2")),
                              get_plot4_sep(df_pseudotime_list[[15]])[c(3,4)]))+
  get_draw_plot(3,get_plot_list(list(p_dca_denoised_hvg_1+scale_color_brewer(name="dataset_batch",palette = "Set2"),
                                     p_dca_denoised_hvg_2,
                                     p_dca_denoised_hvg_3+scale_fill_brewer(name="dataset_batch",palette = "Set2")), 
                                get_plot4_sep(df_pseudotime_list[[10]])[c(3,4)]))+
  get_draw_plot(4,get_plot_list(list(p_scvi_denoised_hvg_1+scale_color_brewer(name="dataset_batch",palette = "Set2"),
                                     p_scvi_denoised_hvg_2,
                                     p_scvi_denoised_hvg_3+scale_fill_brewer(name="dataset_batch",palette = "Set2")),
                                get_plot4_sep(df_pseudotime_list[[7]])[c(3,4)]))+
get_draw_plot(5,get_plot_list(list(p_mnn_denoised_hvg_1+scale_color_brewer(name="dataset_batch",palette = "Set2"),
                                   p_mnn_denoised_hvg_2,
                                   p_mnn_denoised_hvg_3+scale_fill_brewer(name="dataset_batch",palette = "Set2")),
                              get_plot4_sep(df_pseudotime_list[[12]])[c(3,4)]))
for (i in 1:length(use_id)){
  p=p+draw_label(labels[i],x=get_label_pos(i)[1],y=get_label_pos(i)[2],size=30,color="black",hjust = 0,vjust = 1)+
    draw_label(paste0(Methods[use_id[i]],collapse = ""),
               x=get_title_pos(i)[1],
               y=get_title_pos(i)[2],
               size=20,
               color = "black",angle = 90,hjust = 0.5,vjust = 0.5)
}
p

ggsave(p,filename = "./revised_figures/CarDEC_monocyte_Main_fig1.pdf",width = 30,height = 25)
ggsave(p,filename = "./revised_figures/CarDEC_monocyte_Main_fig1.tiff",width = 30,height = 25,compression="lzw")

8.2 Supplementary Figures about monocyte

  1. Raw
plist0=rep(list(),length=9)
plist0[1:3]=list(p_ori_all_1+scale_color_brewer(name="dataset_batch",palette = "Set2")+theme_use,
                 p_ori_all_2,
                 p_ori_all_3+scale_fill_brewer(name="dataset_batch",palette = "Set2")+theme(plot.margin = unit(c(0,0,1,0),"cm")))[1:3]
plist0[4:5]=get_plot4_sep(df_pseudotime_list[[2]])[1:2]
plist0[[6]]=ggplot()+theme_void()+theme(plot.margin = unit(c(0,0,1,0),"cm"))
plist0[7:8]=get_plot4_sep(df_pseudotime_list[[2]])[3:4]
plist0[[9]]=ggplot()+theme_void()
p=ggdraw()+draw_plot(egg::ggarrange(plots = plist0,ncol=3,draw = F),x=0,y=0,width=1,height=1)+
 draw_label("a",x=0,y=1-0.02,size=30,color="black",hjust = 0,vjust = 1)+
 draw_label("b",x=0,y=2/3-0.03,size=30,color="black",hjust = 0,vjust = 1)+
   draw_label("c",x=0,y=1/3-0.03,size=30,color="black",hjust = 0,vjust = 1)
p

ggsave(p,filename = "./revised_figures/CarDEC_monocyte_Supp_fig1.pdf",width = 18,height = 16)
ggsave(p,filename = "./revised_figures/CarDEC_monocyte_Supp_fig1.tiff",width = 18,height = 16,compression="lzw")
  1. All genes
fig_width=30
fig_height=25
labels=letters[1:5]
Methods=c("Raw count HVGs","Raw count All","CarDEC (latent)","CarDEC (denoised HVGs)","CarDEC (denoised All)","scVI (latent)","scVI (denoised HVGs)","scVI (denoised All)","DCA (latent)","DCA (denoised HVGs)","DCA (denoised All)","MNN (denoised HVGs)","MNN (denoised All)","Scanorama (latent)","Scanorama (denoised HVGs)","Scanorama (denoised All)")
use_id=c(5,16,11,8,13)
get_draw_plot=function(plot_id=1,plist0){
  x=0.02
  y=1-plot_id/5
  width=0.98
  height=1/5-0.01 # total number of figures is 12
  pp=draw_plot(egg::ggarrange(plots=plist0,nrow = 1,draw = F,newpage = F),x = x,y = y,width = width,height = height)
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(pp)
}
get_label_pos=function(plot_id=1){
  x=0
  y=1-plot_id/5
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(c(x,y+1/5-1/30))
}
get_title_pos=function(plot_id=1){
  x=0.015
  y=1-plot_id/5
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(c(x,y+1/10-1/60))
}
get_plot_list=function(x,y){
  x0=rep(list(),length=length(x)+length(y))
  x0[1:length(x)]=x[1:3]
  x0[(length(x)+1):length(x0)]=y[1:2]
  return(x0)
}
p=ggdraw()+
  get_draw_plot(1,get_plot_list(list(p_carDEC_denoised_all_1+scale_color_brewer(name="dataset_batch",palette = "Set2"),
                                     p_carDEC_denoised_all_2,
                                     p_carDEC_denoised_all_3+scale_fill_brewer(name="dataset_batch",palette = "Set2")), 
                                get_plot4_sep(df_pseudotime_list[[5]])[c(3,4)]))+
 get_draw_plot(2,get_plot_list(list(p_monocle_scanorama_denoised_all_1+scale_color_brewer(name="dataset_batch",palette = "Set2"),
                                   p_monocle_scanorama_denoised_all_2,
                                   p_monocle_scanorama_denoised_all_3+scale_fill_brewer(name="dataset_batch",palette = "Set2")),
                              get_plot4_sep(df_pseudotime_list[[16]])[c(3,4)]))+
  get_draw_plot(3,get_plot_list(list(p_dca_denoised_all_1+scale_color_brewer(name="dataset_batch",palette = "Set2"),
                                     p_dca_denoised_all_2,
                                     p_dca_denoised_all_3+scale_fill_brewer(name="dataset_batch",palette = "Set2")), 
                                get_plot4_sep(df_pseudotime_list[[11]])[c(3,4)]))+
  get_draw_plot(4,get_plot_list(list(p_scVI_denoised_all_1+scale_color_brewer(name="dataset_batch",palette = "Set2"),
                                     p_scVI_denoised_all_2,
                                     p_scVI_denoised_all_3+scale_fill_brewer(name="dataset_batch",palette = "Set2")), 
                                get_plot4_sep(df_pseudotime_list[[8]])[c(3,4)]))+
  get_draw_plot(5,get_plot_list(list(p_mnn_denoised_all_1+scale_color_brewer(name="dataset_batch",palette = "Set2"),
                                     p_mnn_denoised_all_2,
                                     p_mnn_denoised_all_3+scale_fill_brewer(name="dataset_batch",palette = "Set2")), 
                                get_plot4_sep(df_pseudotime_list[[13]])[c(3,4)]))
 
for (i in 1:length(use_id)){
  p=p+draw_label(labels[i],x=get_label_pos(i)[1],y=get_label_pos(i)[2],size=30,color="black",hjust = 0,vjust = 1)+
    draw_label(paste0(Methods[use_id[i]],collapse = ""),
               x=get_title_pos(i)[1],
               y=get_title_pos(i)[2],
               size=20,
               color = "black",angle = 90,hjust = 0.5,vjust = 0.5)
}
p

ggsave(p,filename = "./revised_figures/CarDEC_monocyte_Supp_fig2.pdf",width = 30,height = 25,limitsize = F)
ggsave(p,filename = "./revised_figures/CarDEC_monocyte_Supp_fig2.tiff",width = 30,height = 25,limitsize = F,compression="lzw")
  1. Latent
fig_width=30
fig_height=25
labels=letters[1:5]
Methods=c("Raw count HVGs","Raw count All","CarDEC (latent)","CarDEC (denoised HVGs)","CarDEC (denoised All)","scVI (latent)","scVI (denoised HVGs)","scVI (denoised All)","DCA (latent)","DCA (denoised HVGs)","DCA (denoised All)","MNN (denoised HVGs)","MNN (denoised All)","Scanorama (latent)","Scanorama (denoised HVGs)","Scanorama (denoised All)")
use_id=c(3,14,9,6,1)
get_draw_plot=function(plot_id=1,plist0){
  x=0.02
  y=1-plot_id/5
  width=0.98
  height=1/5-0.01 # total number of figures is 12
  pp=draw_plot(egg::ggarrange(plots=plist0,nrow = 1,draw = F,newpage = F),x = x,y = y,width = width,height = height)
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(pp)
}
get_label_pos=function(plot_id=1){
  x=0
  y=1-plot_id/5
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(c(x,y+1/5-1/30))
}
get_title_pos=function(plot_id=1){
  x=0.015
  y=1-plot_id/5
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(c(x,y+1/10-1/60))
}
get_plot_list=function(x,y){
  x0=rep(list(),length=length(x)+length(y))
  x0[1:length(x)]=x[1:3]
  x0[(length(x)+1):length(x0)]=y[1:2]
  return(x0)
}
p=ggdraw()+get_draw_plot(1,get_plot_list(list(p_carDEC_latent_1+scale_color_brewer(name="dataset_batch",palette = "Set2"),
                                              p_carDEC_latent_2,
                                              p_carDEC_latent_3+scale_fill_brewer(name="dataset_batch",palette = "Set2")),
                                         get_plot4_sep(df_pseudotime_list[[3]])[c(3,4)]))+
  get_draw_plot(2,get_plot_list(list(p_monocle_scanorama_latent_hvg_1+scale_color_brewer(name="dataset_batch",palette = "Set2"),
                                   p_monocle_scanorama_latent_hvg_2,
                                   p_monocle_scanorama_latent_hvg_3+scale_fill_brewer(name="dataset_batch",palette = "Set2")),
                              get_plot4_sep(df_pseudotime_list[[14]])[c(3,4)]))+
  get_draw_plot(3,get_plot_list(list(p_dca_latent_all_1+scale_color_brewer(name="dataset_batch",palette = "Set2"),
                                     p_dca_latent_all_2,
                                     p_dca_latent_all_3+scale_fill_brewer(name="dataset_batch",palette = "Set2")), 
                                get_plot4_sep(df_pseudotime_list[[9]])[c(3,4)]))+
  get_draw_plot(4,get_plot_list(list(p_scVI_latent_all_1+scale_color_brewer(name="dataset_batch",palette = "Set2"),
                                     p_scVI_latent_all_2,
                                     p_scVI_latent_all_3+scale_fill_brewer(name="dataset_batch",palette = "Set2")), 
                                get_plot4_sep(df_pseudotime_list[[6]])[c(3,4)]))+
  get_draw_plot(5,get_plot_list(list(p_ori_1+scale_color_brewer(name="dataset_batch",palette = "Set2"),
                                     p_ori_2,
                                     p_ori_3+scale_fill_brewer(name="dataset_batch",palette = "Set2")), 
                                get_plot4_sep(df_pseudotime_list[[1]])[c(3,4)]))
for (i in 1:length(use_id)){
  p=p+draw_label(labels[i],x=get_label_pos(i)[1],y=get_label_pos(i)[2],size=30,color="black",hjust = 0,vjust = 1)+
    draw_label(paste0(Methods[use_id[i]],collapse = ""),
               x=get_title_pos(i)[1],
               y=get_title_pos(i)[2],
               size=20,
               color = "black",angle = 90,hjust = 0.5,vjust = 0.5)
}
p

ggsave(p,filename = "./revised_figures/CarDEC_monocyte_Supp_fig3.pdf",width = 30,height = 25,limitsize = F)
ggsave(p,filename = "./revised_figures/CarDEC_monocyte_Supp_fig3.tiff",width = 30,height = 25,limitsize = F,compression="lzw")

8.3 S100A8 and FCGR3A’s feature plots

  1. monocles’ UMAP of denoised counts from HVGs
fig_width=15
fig_height=25
labels=letters[1:13]
Methods=c("Raw count HVGs","Raw count All","CarDEC (latent)","CarDEC (denoised HVGs)","CarDEC (denoised All)","scVI (latent)","scVI (denoised HVGs)","scVI (denoised All)","DCA (latent)","DCA (denoised HVGs)","DCA (denoised All)","MNN (denoised HVGs)","MNN (denoised All)","Scanorama (latent)","Scanorama (denoised HVGs)","Scanorama (denoised All)")
use_id=c(4,15,10,7,12)
#use_id=c(3,9,6,1)
get_draw_plot=function(plot_id=1,plist0){
  x=0.02
  y=1-plot_id/5
  width=0.98
  height=1/5-0.01 # total number of figures is 12
  pp=draw_plot(egg::ggarrange(plots=plist0,nrow = 1,draw = F,newpage = F),x = x,y = y,width = width,height = height)
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(pp)
}
get_label_pos=function(plot_id=1){
  x=0
  y=1-plot_id/5
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(c(x,y+1/5-1/100))
}
get_title_pos=function(plot_id=1){
  x=0.015
  y=1-plot_id/5
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(c(x,y+1/10-1/100))
}
get_plot_list=function(x,y){
  x0=rep(list(),length=length(x)+length(y))
  x0[1:length(x)]=x[1:3]
  x0[(length(x)+1):length(x0)]=y[1:2]
  return(x0)
}
p=ggdraw()
for(i in 1:length(use_id)){
  p=p+get_draw_plot(i,get_plot4_sep(df_pseudotime_list[[use_id[i]]])[c(1,2)])
}
for (i in 1:length(use_id)){
  p=p+draw_label(labels[i],x=get_label_pos(i)[1],y=get_label_pos(i)[2],size=30,color="black",hjust = 0,vjust = 1)+
    draw_label(paste0(Methods[use_id[i]],collapse = ""),
               x=get_title_pos(i)[1],
               y=get_title_pos(i)[2],
               size=20,
               color = "black",angle = 90,hjust = 0.5,vjust = 0.5)
}
ggsave("./revised_figures/CarDEC_monocyte_Supp_fig4.pdf",p,width = 15,height = 25)
ggsave("./revised_figures/CarDEC_monocyte_Supp_fig4.tiff",p,width = 15,height = 25,compression="lzw")
p

  1. monocles’ UMAP of denoised counts from All genes
fig_width=15
fig_height=25
labels=letters[1:13]
Methods=c("Raw count HVGs","Raw count All","CarDEC (latent)","CarDEC (denoised HVGs)","CarDEC (denoised All)","scVI (latent)","scVI (denoised HVGs)","scVI (denoised All)","DCA (latent)","DCA (denoised HVGs)","DCA (denoised All)","MNN (denoised HVGs)","MNN (denoised All)","Scanorama (latent)","Scanorama (denoised HVGs)","Scanorama (denoised All)")
use_id=c(5,16,11,8,13)
get_draw_plot=function(plot_id=1,plist0){
  x=0.02
  y=1-plot_id/5
  width=0.98
  height=1/5-0.01 # total number of figures is 12
  pp=draw_plot(egg::ggarrange(plots=plist0,nrow = 1,draw = F,newpage = F),x = x,y = y,width = width,height = height)
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(pp)
}
get_label_pos=function(plot_id=1){
  x=0
  y=1-plot_id/5
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(c(x,y+1/5-1/100))
}
get_title_pos=function(plot_id=1){
  x=0.015
  y=1-plot_id/5
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(c(x,y+1/10-1/100))
}
get_plot_list=function(x,y){
  x0=rep(list(),length=length(x)+length(y))
  x0[1:length(x)]=x[1:3]
  x0[(length(x)+1):length(x0)]=y[1:2]
  return(x0)
}
p=ggdraw()
for(i in 1:length(use_id)){
  p=p+get_draw_plot(i,get_plot4_sep(df_pseudotime_list[[use_id[i]]])[c(1,2)])
}
for (i in 1:length(use_id)){
  p=p+draw_label(labels[i],x=get_label_pos(i)[1],y=get_label_pos(i)[2],size=30,color="black",hjust = 0,vjust = 1)+
    draw_label(paste0(Methods[use_id[i]],collapse = ""),
               x=get_title_pos(i)[1],
               y=get_title_pos(i)[2],
               size=20,
               color = "black",angle = 90,hjust = 0.5,vjust = 0.5)
}
ggsave("./revised_figures/CarDEC_monocyte_Supp_fig5.pdf",p,width = 15,height = 25)
ggsave("./revised_figures/CarDEC_monocyte_Supp_fig5.tiff",p,width = 15,height = 25,compression="lzw")
p

  1. monocles’ UMAP based on different methods’ latent
fig_width=15
fig_height=25
labels=letters[1:13]
Methods=c("Raw count HVGs","Raw count All","CarDEC (latent)","CarDEC (denoised HVGs)","CarDEC (denoised All)","scVI (latent)","scVI (denoised HVGs)","scVI (denoised All)","DCA (latent)","DCA (denoised HVGs)","DCA (denoised All)","MNN (denoised HVGs)","MNN (denoised All)","Scanorama (latent)","Scanorama (denoised HVGs)","Scanorama (denoised All)")
use_id=c(3,14,9,6,1)
get_draw_plot=function(plot_id=1,plist0){
  x=0.02
  y=1-plot_id/5
  width=0.98
  height=1/5-0.01 # total number of figures is 12
  pp=draw_plot(egg::ggarrange(plots=plist0,nrow = 1,draw = F,newpage = F),x = x,y = y,width = width,height = height)
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(pp)
}
get_label_pos=function(plot_id=1){
  x=0
  y=1-plot_id/5
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(c(x,y+1/5-1/100))
}
get_title_pos=function(plot_id=1){
  x=0.015
  y=1-plot_id/5
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(c(x,y+1/10-1/100))
}
get_plot_list=function(x,y){
  x0=rep(list(),length=length(x)+length(y))
  x0[1:length(x)]=x[1:3]
  x0[(length(x)+1):length(x0)]=y[1:2]
  return(x0)
}
p=ggdraw()
for(i in 1:length(use_id)){
  p=p+get_draw_plot(i,get_plot4_sep(df_pseudotime_list[[use_id[i]]])[c(1,2)])
}
for (i in 1:length(use_id)){
  p=p+draw_label(labels[i],x=get_label_pos(i)[1],y=get_label_pos(i)[2],size=30,color="black",hjust = 0,vjust = 1)+
    draw_label(paste0(Methods[use_id[i]],collapse = ""),
               x=get_title_pos(i)[1],
               y=get_title_pos(i)[2],
               size=20,
               color = "black",angle = 90,hjust = 0.5,vjust = 0.5)
}
ggsave("./revised_figures/CarDEC_monocyte_Supp_fig6.pdf",p,width = 15,height = 25)
ggsave("./revised_figures/CarDEC_monocyte_Supp_fig6.tiff",p,width = 15,height = 25,compression="lzw")
p

  1. Combined above three figures
fig_width=24
fig_height=36#
labels=letters[1:16]
Methods=c("Raw count HVGs","Raw count All","CarDEC (latent)","CarDEC (denoised HVGs)","CarDEC (denoised All)","scVI (latent)","scVI (denoised HVGs)","scVI (denoised All)","DCA (latent)","DCA (denoised HVGs)","DCA (denoised All)","MNN (denoised HVGs)","MNN (denoised All)","Scanorama (latent)","Scanorama (denoised HVGs)","Scanorama (denoised All)")
use_id=c(1,3:5,14:16,6:13)
get_draw_plot=function(plot_id=1,plist0){
  x=ifelse(plot_id%%2==1,0,0.5075)
  y=1-floor((plot_id+1)/2)/8
  width=1/2-0.015
  height=1/8-0.01 # total number of figures is 12
  pp=draw_plot(egg::ggarrange(plots=plist0,ncol = 2,draw = F,newpage = F),x = x,y = y,width = width,height = height)
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(pp)
}
get_label_pos=function(plot_id=1){
  x=ifelse(plot_id%%2==1,0,0.5075)
  y=1-floor((plot_id+1)/2)/8
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(c(x,y+1/8-1/100))
}
get_title_pos=function(plot_id=1){
  x=ifelse(plot_id%%2==1,0,0.5075)+0.5/2
  y=1-floor((plot_id+1)/2)/8
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(c(x,y+1/8-1/300))
}
p=ggdraw()
for(i in 1:length(use_id)){
  p=p+get_draw_plot(i,get_plot4_sep(df_pseudotime_list[[use_id[i]]])[c(1,2)])
}
for (i in 1:length(use_id)){
  p=p+draw_label(labels[i],x=get_label_pos(i)[1],y=get_label_pos(i)[2],size=18,color="black",hjust = 0,vjust = 1)+
    draw_label(paste0(Methods[use_id[i]],collapse = ""),x=get_title_pos(i)[1],y=get_title_pos(i)[2],size=25,color = "black",vjust = 1)
}
p

ggsave(p,filename = "./revised_figures/CarDEC_monocyte_Supp_fig456_combined.pdf",width = 24,height = 36)
ggsave(p,filename = "./revised_figures/CarDEC_monocyte_Supp_fig456_combined.tiff",width = 24,height = 36,compression="lzw")
#save object 
rm(mtx)
rm(output)
rm(adata)
rm(cds)
rm(obj0)
rm(raw.data)
gc()
             used    (Mb) gc trigger    (Mb)   max used    (Mb)
Ncells    7706421   411.6   15080422   805.4   15080422   805.4
Vcells 1642530695 12531.6 3505959062 26748.4 3503411665 26729.0
save.image(file="carDEC_monocyte_final_revised.RData")#include scanorama,carDEC_monocyte_final.RData not include scanorama
load("carDEC_monocyte_final_revised.RData")
sessionInfo()
R version 3.6.1 (2019-07-05)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 18.04.3 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=en_SG.UTF-8        LC_COLLATE=en_US.UTF-8     LC_MONETARY=en_SG.UTF-8   
 [6] LC_MESSAGES=en_US.UTF-8    LC_PAPER=en_SG.UTF-8       LC_NAME=C                  LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_SG.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
 [1] grid      splines   stats4    parallel  stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] mgcv_1.8-28                 nlme_3.1-140                monocle3_0.2.1              SingleCellExperiment_1.6.0  SummarizedExperiment_1.14.0
 [6] DelayedArray_0.10.0         BiocParallel_1.20.1         matrixStats_0.56.0          GenomicRanges_1.36.0        GenomeInfoDb_1.20.0        
[11] IRanges_2.18.3              S4Vectors_0.22.1            pheatmap_1.0.12             pbapply_1.4-2               liger_0.4.2                
[16] harmony_1.0                 Rcpp_1.0.6                  RColorBrewer_1.1-2          patchwork_1.1.0.9000        SeuratWrappers_0.1.0       
[21] cowplot_1.0.0               ComplexHeatmap_2.1.2        kableExtra_0.9.0            ggjoy_0.4.1                 ggridges_0.5.1             
[26] tidyr_0.8.3                 dplyr_0.8.5                 monocle_2.9.0               DDRTree_0.1.5               irlba_2.3.3                
[31] VGAM_1.1-2                  ggplot2_3.3.3               Biobase_2.44.0              BiocGenerics_0.32.0         Matrix_1.2-18              
[36] reticulate_1.15            

loaded via a namespace (and not attached):
  [1] rappdirs_0.3.1         RANN.L1_2.5.2          nabor_0.5.0            bit64_0.9-7            knitr_1.23             data.table_1.12.8     
  [7] rpart_4.1-15           RCurl_1.98-1.1         snow_0.4-3             RSQLite_2.1.1          RANN_2.6.1             europepmc_0.3         
 [13] combinat_0.0-8         proxy_0.4-23           future_1.12.0          bit_1.1-15.2           enrichplot_1.2.0       spatstat.data_1.4-3   
 [19] xml2_1.3.0             httpuv_1.5.4           assertthat_0.2.1       viridis_0.5.1          xfun_0.19              hms_0.5.0             
 [25] evaluate_0.14          promises_1.0.1         fansi_0.4.1            progress_1.2.2         caTools_1.18.0         readxl_1.3.1          
 [31] igraph_1.2.5           DBI_1.1.0              htmlwidgets_1.3        sparsesvd_0.2          riverplot_0.6          purrr_0.3.3           
 [37] crosstalk_1.0.0        ggpubr_0.2.1           V8_2.3                 deldir_0.1-25          vctrs_0.2.4            remotes_2.1.1         
 [43] ROCR_1.0-7             abind_1.4-5            withr_2.1.2            ggforce_0.1.3          triebeard_0.3.0        sctransform_0.3.1     
 [49] prettyunits_1.1.1      mclust_5.4.5           goftest_1.2-2          mnormt_1.5-6           cluster_2.1.0          DOSE_3.8.2            
 [55] lazyeval_0.2.2         crayon_1.3.4           pkgconfig_2.0.3        slam_0.1-47            labeling_0.3           units_0.6-5           
 [61] tweenr_1.0.1           rlang_0.4.10           globals_0.12.4         lifecycle_0.2.0        miniUI_0.1.1.1         doSNOW_1.0.18         
 [67] rsvd_1.0.0             cellranger_1.1.0       polyclip_1.10-0        lmtest_0.9-37          urltools_1.7.2         zoo_1.8-7             
 [73] base64enc_0.1-3        GlobalOptions_0.1.1    png_0.1-7              viridisLite_0.3.0      rjson_0.2.20           bitops_1.0-6          
 [79] KernSmooth_2.23-15     blob_1.2.0             shape_1.4.4            pdftools_2.3           stringr_1.4.0          qvalue_2.14.1         
 [85] qpdf_1.1               readr_1.3.1            gridGraphics_0.5-0     ggsignif_0.5.0         scales_1.1.0           memoise_1.1.0         
 [91] magrittr_1.5           plyr_1.8.4             ica_1.0-2              gplots_3.0.3           zlibbioc_1.32.0        gdata_2.18.0          
 [97] compiler_3.6.1         HSMMSingleCell_1.4.0   lsei_1.2-0             clue_0.3-57            fitdistrplus_1.0-14    cli_2.0.2             
[103] XVector_0.24.0         listenv_0.7.0          MASS_7.3-51.4          tidyselect_0.2.5       stringi_1.4.6          densityClust_0.3      
[109] yaml_2.2.0             GOSemSim_2.8.0         askpass_1.1            ggrepel_0.8.1          fastmatch_1.1-0        randomcoloR_1.1.0.1   
[115] tools_3.6.1            future.apply_1.2.0     circlize_0.4.8         rstudioapi_0.11        foreach_1.5.0          gridExtra_2.3         
[121] farver_2.0.3           Rtsne_0.15             ggraph_1.0.2           digest_0.6.25          rvcheck_0.1.3          BiocManager_1.30.10   
[127] FNN_1.1.3              shiny_1.3.2            qlcMatrix_0.9.7        egg_0.4.5              later_1.0.0            RcppAnnoy_0.0.16      
[133] httr_1.4.1             AnnotationDbi_1.48.0   rsconnect_0.8.16       psych_1.9.12.31        npsurv_0.4-0           colorspace_1.4-1      
[139] rvest_0.3.4            tensor_1.5             uwot_0.1.8             spatstat.utils_1.17-0  ggplotify_0.0.3        plotly_4.9.0          
[145] xtable_1.8-4           jsonlite_1.6.1         spatstat_1.64-1        UpSetR_1.3.3           R6_2.4.1               pillar_1.4.2          
[151] htmltools_0.4.0        mime_0.9               glue_1.4.0             clusterProfiler_3.10.1 DT_0.7                 codetools_0.2-16      
[157] fgsea_1.8.0            utf8_1.1.4             lattice_0.20-38        tibble_2.1.3           curl_4.3               leiden_0.3.1          
[163] gtools_3.8.2           magick_2.3             zip_2.0.4              GO.db_3.7.0            openxlsx_4.1.4         survival_2.44-1.1     
[169] limma_3.42.2           rmarkdown_1.11         docopt_0.6.1           fastICA_1.2-2          munsell_0.5.0          GenomeInfoDbData_1.2.2
[175] DO.db_2.9              GetoptLong_0.1.8       iterators_1.0.12       reshape2_1.4.3         gtable_0.3.0           Seurat_3.2.2          
---
title: "The results for monocytes dataset"
subtitle: "raw count matrix, CarDEC, DCA+combat, scVI and scanorama"
author:  Xiangjie Li
date: "`r format(Sys.time(), '%m/%d/%Y')`"
output:
  html_notebook:
    number_sections: yes
    toc: yes
  jekyllthat::jekylldown:
  html_document:
    df_print: paged
    toc: yes
    number_sections: yes
  prettydoc::html_pretty:
    theme: cayman
    highlight: github
    math: katex
    toc: yes
---

<style>
pre {
  max-height: 200px;
  float: left;
  width: 910px;
  overflow-y: auto;
}
pre.r {
  max-height: none;
}
</style>


**Data Summary:** 

This dataset was generated by our group, which can be downloaded from [GEO (GSE146974)](https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc=GSE146974) or or [https://drive.google.com/file/d/1kR8Hhufoo2h2OtomW8n3kM0gaQhVS564/view?usp=sharing](https://drive.google.com/file/d/1kR8Hhufoo2h2OtomW8n3kM0gaQhVS564/view?usp=sharing). This dataset was generated from human peripheral blood mononuclear clear cells by Ficoll Separation followed by CD14 and CD16 positive cell selection. Since the CD14 and CD16 antibodies are not 100% specific, some T cells were also present in the scRNA-seq data. We performed clustering analysis using leiden’s algorithm for each batch and identified 288 T cells in total based on the T cell marker genes CD3D, CD3E and CD3G. Aftering removing these 288 T cells, there are 10,878 cells and 21,289 genes, which was processed and sequenced at three different days, resulting in three batches (3,640 cells in T1, 4,833 cells in T2 and 2,405 cells in T3) left in the remaining analysis. 

__***Human monocyte preparation***__: Monocyte preparation uses a modification of published protocols. Briefly, ~20 ml blood drawn in sodium heparin was processed immediately in the lab in the Clinical Research Center at Columbia University. PBMCs were isolated by gradient Ficoll paque centrifugation, which maintains cell viability and prevents ex vivo activation during cell recovery. Cells were stained with antibodies against human HLADR, CD14 and CD16 and monocyte subsets defined as HLADR+CD14++CD16-(classical), HLADR+CD14++CD16+ (intermediate), HLADR+CD14dim/CD16++ (nonclassical, patrolling monocyte). DAPI staining was used to exclude dead cells. Monocytes were sorted by a BD Influx Sorter into tubes for real-time 10x Genomics analysis.

# Summary 

Here I used monocle3 (monocle3_0.2.1) to conduct the pseudotime analysis. 

> <span style="color:red"> CarDEC, scVI and DCA are both deep learning based methods. For each method, we used all genes as the input, the way of `Using latetn`One is the standard pipline for monocle3 (`denosied count` -> `normalization` -> `scaling` -> `pca` dimension reduction -> `umap visualization` based on `pca` dimension reduction) and the other method replces the `pca` by the latent representation and then umap visualization based on latent representation. </span>


============================================================================================

- __***HVGs raw***__: Only using highly variable genes (HVG) as the inputs for Monocle3
- __***All genes raw***__: Using all genes (HVG) as the inputs for Monocle3
- __***Using latent***__: Replceing the `pca` by the latent representation and then umap visualization based on latent representation (CarDEC, DCA+combat, scVI and scanorama).
- __***HVG denoised***__: Only using denosined expression count from those highly variable genes (HVGs) as the input for monocle3 and then conducted standard pipline of monocle3 (CarDEC, DCA, scVI and scanorama).
- __***All genes denoised***__: Used all denoised expression values as the input for monocle3 and then conducted the standard pipline of monocle3 (CarDEC, DCA, and scVI).

```{r}
options(warn=-1) # turn off warning message globally
.libPaths(c("/home/xiaoxiang/R/x86_64-pc-linux-gnu-library/3.5",.libPaths()))
Sys.setenv(RETICULATE_PYTHON_ENV="/home/xiaoxiang/anaconda3/envs/py36")#="/home/xiaoxiang/.conda/envs/DESCVIR"
Sys.setenv(RETICULATE_PYTHON="/usr/bin/python3")
#RETICULATE_PYTHON="/home/xiaoxiang/anaconda3/bin/python3",
if ("Seurat" %in% loadedNamespaces()) detach("package:Seurat",unload = T)
dyn.load("/home/xiaoxiang/R/x86_64-pc-linux-gnu-library/3.5/sf/libs/sf.so")
#suppressPackageStartupMessages(library(monocle,lib.loc = "/usr/lib/R/monocle_alpha"))# devtools::install_github("")
#devtools::install_github("cole-trapnell-lab/DDRTree", ref="simple-ppt-like",lib="/usr/lib/R/monocle_alpha")
#devtools::install_github("r-spatial/sf") if 
#install.packages("~/Downloads/monocle-release-monocle3_alpha/", repos = NULL,lib = "/usr/lib/R/monocle_alpha")
suppressPackageStartupMessages(library(reticulate))
#suppressPackageStartupMessages(library(devtools))
suppressPackageStartupMessages(library(monocle3))

#suppressPackageStartupMessages(library(flexclust))
#suppressPackageStartupMessages(library(mcclust))
suppressPackageStartupMessages(library(dplyr))
suppressPackageStartupMessages(library(ggjoy))
suppressPackageStartupMessages(library(VGAM))
suppressPackageStartupMessages(library(knitr))
suppressPackageStartupMessages(library(ggplot2))
suppressPackageStartupMessages(library(kableExtra))
suppressPackageStartupMessages(library(cowplot))
#py_install('umap-learn', pip = T, pip_ignore_installed = T)
#import("leiden")
#fig_path="/home/xiaoxiang/Documents/DESC_paper_prepare/DESC_paper_final/formal_revised/figures_sep/"
datadirpath="./"
knitr::opts_chunk$set(echo=T)
```


```{r}
df_pseudotime_list=list()
```

```{r,echo=F,include=T,eval=T}
get_p_new =function(x,x2=NULL) {
 res1=sapply(x,function(i0) ifelse(i0<=0,"<2.2e-16",scales::scientific(i0,digits = 3))) 
 res2=rep(c(""),length=length(res1))
 if(!is.null(x2)){
   res2=round(x2,3) 
 }
 return(paste0(res2," (",res1,")"))
}

num_methods=16#original 13
Stable5=data.frame(Method=c("Raw count HVGs","Raw count All","CarDEC (latent)","CarDEC (denoised HVGs)","CarDEC (denoised All)","scVI (latent)","scVI (denoised HVGs)","scVI (denoised All)","DCA (latent)","DCA (denoised HVGs)","DCA (denoised All)","MNN (denoised HVGs)","MNN (denoised All)","Scanorama (latent)","Scanorama (denoised HVGs)","Scanorama (denoised All)"),
                  `T1 v.s. T2`=rep("<2.2e-16",length=num_methods),
                  `T1 v.s. T3`=rep("<2.2e-16",length=num_methods),
                  `T2 v.s. T3`=rep("<2.2e-16",length=num_methods),stringsAsFactors = F)
rownames(Stable5)=Stable5$Method
#table5 %>%
#  kable() %>%
#  kable_styling()
```


```{r}
# load necessay function
#source("/media/xiaoxiang/D/DESC_reproducible_file/helpfunc_new.R")
#source("/media/xiaoxiang/D/Upenn_computer_backup/Documents/Human_Heart_Project/heart/Heart_result_updated/helpfunc_new.R")
old=theme_set(theme_bw()+theme(strip.background = element_rect(fill="white"),
                                         panel.background = element_blank(),
                               legend.background = element_blank(),
                                         panel.grid =element_blank()))

BatchKL=function(df,dimensionData=NULL,replicates=200,n_neighbors=100,n_cells=100,batch="BatchID"){
  #entropy of batch mixiing
  #replicates is the number of boostrap times
  #n_neighbors is the number of nearest neighbours of cell(from all batchs)
  #n_cells is the number of randomly picked cells
  if (is.null(dimensionData)){
        tsnedata=as.matrix(df[,c("tSNE_1","tSNE_2")])
  }else{
        tsnedata=as.matrix(dimensionData)
  }
  batchdata=factor(as.vector(df[,batch]))
  table.batchdata=as.matrix(table(batchdata))[,1]
  tmp00=table.batchdata/sum(table.batchdata)#proportation of population
  n=dim(df)[1]
  KL=sapply(1:replicates,function(x){
    bootsamples=sample(1:n,n_cells)
    #nearest=nn2(tsnedata,tsnedata[bootsamples,],k=n_neighbors)
    nearest=nabor::knn(tsnedata,tsnedata[bootsamples,],k=min(5*length(tmp00),n_neighbors))
    KL_x=sapply(1:length(bootsamples),function(y){
      id=nearest$nn.idx[y,]
      tmp=as.matrix(table(batchdata[id]))[,1]
      tmp=tmp/sum(tmp)
      return(sum(tmp*log2(tmp/tmp00),na.rm = T))
    })
    return(mean(KL_x,na.rm = T))
  })
  return(KL)
}
```

```{r}
Convert_to_seurat3=function(adata){
  suppressPackageStartupMessages(library("Seurat",lib.loc = "/usr/lib/R/self_library/"))
  mtx=py_to_r(adata$X$T$tocsc())
  cellinfo=py_to_r(adata$obs)
  geneinfo=py_to_r(adata$var)
  colnames(mtx)=cellinfo$cellname
  rownames(mtx)=rownames(geneinfo)
  obj=CreateSeuratObject(mtx,meta.data = cellinfo[,!colnames(cellinfo)%in%c("n_genes","n_counts"),drop=F],min.features  = 1)
  return(obj)
}
getwd()
```


```{r}
get_plot4=function(df00){
  p1=ggplot()+geom_point(data =df00,aes(x=UMAP_1,y=UMAP_2,color=FCGR3A),size=0.01)+
    scale_color_gradient(low="grey",high="red")+
    theme(legend.position = "top")+
    guides(color=guide_colorbar(title.vjust = 0.7))
  
  p2=ggplot()+geom_point(data =df00,aes(x=UMAP_1,y=UMAP_2,color=S100A8),size=0.01)+
    scale_color_gradient(low="grey",high="red")+
    theme(legend.position = "top")+
     guides(color=guide_colorbar(title.vjust = 0.7))
  
  p3=ggplot(data =df00,aes(x=pseudotime,y=FCGR3A))+
      geom_point(aes(color=BatchID),size=0.01)+
      guides(color=guide_legend(override.aes = list(size=5)))+
      geom_smooth(aes(color=BatchID),method="gam",formula = y ~ s(x, bs="cs"))+
       geom_smooth(color="black",method="gam",formula = y ~ s(x, bs="cs"),size=0.5)+
      ggtitle("")+xlab("Pseudotime")+theme(legend.position = "top",
                               plot.title = element_text(size=18,face="bold",hjust=0.5),
                               legend.text = element_text(size=15,face="bold"),
                               plot.margin = unit(c(0,1,0,0),"cm"),
                               legend.title = element_blank())+
    scale_color_brewer(palette = "Set2")
  
  p4=ggplot(data =df00,aes(x=pseudotime,y=S100A8))+
      geom_point(aes(color=BatchID),size=0.01)+
      guides(color=guide_legend(override.aes = list(size=5)))+
      geom_smooth(aes(color=BatchID),method="gam",formula = y ~ s(x, bs="cs"))+
       geom_smooth(color="black",method="gam",formula = y ~ s(x, bs="cs"),size=0.5)+
      ggtitle("")+xlab("Pseudotime")+theme(legend.position = "top",
                               plot.title = element_text(size=18,face="bold",hjust=0.5),
                               legend.text = element_text(size=15,face="bold"),
                               legend.title = element_blank())+scale_color_brewer(palette = "Set2")
  
  p=egg::ggarrange(p1,p3,p2,p4,ncol=4,draw=F)
  return(p)
}

get_plot4_sep=function(df00){
  p1=ggplot()+geom_point(data =df00,aes(x=UMAP_1,y=UMAP_2,color=FCGR3A),size=0.01)+
    scale_color_gradient(low="grey",high="red")+
    theme(legend.position = "top")+
    guides(color=guide_colorbar(title.vjust = 0.7))
  
  p2=ggplot()+geom_point(data =df00,aes(x=UMAP_1,y=UMAP_2,color=S100A8),size=0.01)+
    scale_color_gradient(low="grey",high="red")+
    theme(legend.position = "top")+
     guides(color=guide_colorbar(title.vjust = 0.7))
  
  p3=ggplot(data =df00,aes(x=pseudotime,y=FCGR3A))+
      geom_point(aes(color=BatchID),size=0.05)+
      guides(color=guide_legend(override.aes = list(size=5)))+
      geom_smooth(aes(color=BatchID),method="gam",formula = y ~ s(x, bs="cs"))+
       geom_smooth(color="black",method="gam",formula = y ~ s(x, bs="cs"),size=0.5)+
      ggtitle("")+xlab("Pseudotime")+theme(legend.position = "top",
                               plot.title = element_text(size=18,face="bold",hjust=0.5),
                               legend.text = element_text(size=15,face="bold"),
                               #plot.margin = unit(c(0,1,0,0),"cm"),
                               legend.title = element_blank())+
    scale_color_brewer(palette = "Set2")
  p4=ggplot(data =df00,aes(x=pseudotime,y=S100A8))+
      geom_point(aes(color=BatchID),size=0.05)+
      guides(color=guide_legend(override.aes = list(size=5)))+
      geom_smooth(aes(color=BatchID),method="gam",formula = y ~ s(x, bs="cs"))+
       geom_smooth(color="black",method="gam",formula = y ~ s(x, bs="cs"),size=0.5)+
      ggtitle("")+xlab("Pseudotime")+theme(legend.position = "top",
                               plot.title = element_text(size=18,face="bold",hjust=0.5),
                               legend.text = element_text(size=15,face="bold"),
                               legend.title = element_blank())+
    scale_color_brewer(palette = "Set2")
  return(list(p1,p2,p3,p4))
}

```




```{r}
ad=import("anndata",convert = FALSE)
adata=ad$read_h5ad("../../dca_test.h5ad")
obj0=Convert_to_seurat3(adata)
obj0=NormalizeData(obj0,verbose = F)
raw.data=obj0@assays$RNA@counts
```

```{r}
maprules=c("2017_0801"="T1","2017_1017"="T2","2017_1120"="T3")
maprules
```

Here we compared different methods, including `DCA` and `scVI`. 

```{r}
hvg_genes=read.table("../final_processed_results/CarDEC_hvg_used.tsv",header = T,sep="\t",stringsAsFactors = F)
hvg_genes=subset(hvg_genes,Variance.Type=="HVG") #top 2000 genes 
```

# Monocle3 for `raw data`
## HVGs raw 

```{r}
cell.meta.data=obj0@meta.data
cell.meta.data$dataset_batch=plyr::mapvalues(cell.meta.data$batch_label,names(maprules),maprules)
gene_ann=data.frame(gene_short_name = make.unique(rownames(raw.data)),row.names = make.unique(rownames(raw.data)))
#pd <- new("AnnotatedDataFrame",data=cell.meta.data)
#fd <- new("AnnotatedDataFrame",data=gene_ann)
cds <- new_cell_data_set(raw.data[rownames(raw.data)%in%hvg_genes$genename,], 
                         cell_metadata = cell.meta.data,
                         gene_metadata =gene_ann[gene_ann$gene_short_name%in%hvg_genes$genename,,drop=F])
## Step 1: Normalize and pre-process the data
cds <- preprocess_cds(cds, num_dim = 32,method="PCA",norm_method="log",verbose = F)
## Step 2: Remove batch effects with cell alignment
##cds <- align_cds(cds, alignment_group = "BatchID", residual_model_formula_str = NULL)
## Step 3: Reduce the dimensions using UMAP
cds <- reduce_dimension(cds,reduction_method = "UMAP",preprocess_method="PCA",verbose = F)

## Step 4: Cluster the cells
cds <- cluster_cells(cds,reduction_method ="UMAP",cluster_method = "leiden",verbose = F)
# Construct the graph
# Note that, for the rest of the code to run, the graph should be fully (partionly) connected
## Step 5: Learn a graph
cds <- learn_graph(cds, use_partition = T,verbose = F)
colData(cds)$clusters=cds@clusters$UMAP$clusters
p1=plot_cells(cds,color_cells_by = "partition",label_cell_groups = F)+theme(legend.position = "top")
p2=plot_cells(cds,color_cells_by = "clusters",label_cell_groups=F,graph_label_size=2, label_leaves=F,label_branch_points=F)+theme(legend.position = "top")
p=cowplot::plot_grid(p1,p2,align = "h",ncol = 3)
```

```{r fig.height=5,fig.width=18}
p
```

```{r}
## Step 6: Order cells
# a helper function to identify the root principal points:
get_earliest_principal_node <- function(cds, cluster=c("1","5")){
  root_pr_nodes=sapply(cluster,function(ii){
    cell_ids <- which(colData(cds)[, "clusters"] %in%ii)
  
  closest_vertex <-cds@principal_graph_aux[["UMAP"]]$pr_graph_cell_proj_closest_vertex
  
  closest_vertex <- as.matrix(closest_vertex[colnames(cds), ])
  root_pr_nodes <-igraph::V(principal_graph(cds)[["UMAP"]])$name[as.numeric(names(which.max(table(closest_vertex[cell_ids,]))))]
  })
  root_pr_nodes
}
# root cells
ids=get_earliest_principal_node(cds,cluster=c("1","3","4"))
cds <- order_cells(cds,root_pr_nodes = ids)
#plot_cells(cds,color_cells_by = "pseudotime")
```

```{r,fig.height=5,fig.width=18}
colData(cds)$pseudotime=pseudotime(cds)
colData(cds)$Pseudotime=colData(cds)$pseudotime/max(colData(cds)$pseudotime,na.rm = T)
df_den=pData(cds)[,c("Pseudotime","dataset_batch")]
df_den=as.data.frame(df_den[!is.infinite(df_den$Pseudotime),])

set.seed(10)
theme_use=theme(legend.text = element_text(size=16),
                legend.title = element_text(size=20))

p_ori_1=plot_cells(cds,color_cells_by = "dataset_batch",graph_label_size=0,alpha=1,cell_size = 0.6)+
  guides(colour = guide_legend(override.aes = list(alpha=0.7, size=5)))+
  theme_use+
  theme(legend.position = "top")+theme(legend.title = element_blank())
          
p_ori_2=plot_cells(cds,color_cells_by = "Pseudotime",label_branch_points=T,graph_label_size=2,alpha=1,cell_size = 0.6)+
            theme(legend.position = "top",
              legend.title = element_text(vjust = 0.2),
              legend.text = element_text(angle=-50 ),
                  legend.key.height = unit(0.5,"cm"),
                  legend.key.width = unit(1,"cm"))+
            guides(color = guide_colourbar(label.position = "top"))+theme_use

p_ori_3=ggplot(data=df_den)+geom_density(aes(x=Pseudotime,fill=dataset_batch),alpha=0.7)+
            scale_y_continuous(expand = c(0,0))+
            scale_x_continuous(expand = c(0,0))+
            theme(legend.position="top")+theme_use

p_monocle_ori=egg::ggarrange(p_ori_1,p_ori_2,p_ori_3,ncol=3,draw=F)
```

```{r}
# printed how many cells with no pseudotime
table(as.numeric(is.infinite(pData(cds)[,c("pseudotime")]))) #0 mean normal pseudotime and 1 means infinity.
```

```{r fig.height=5,fig.width=18}
p_monocle_ori
```

```{r}
cds_exprs=FetchData(obj0,vars = c("FCGR3A","S100A8"))
df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,cds_exprs))
#cds_exprs=as.matrix(SingleCellExperiment::counts(cds)[c("FCGR3A","S100A8"),])
#df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,log1p(t(cds_exprs)/size_factors(cds))))
df0$UMAP_1=reducedDims(cds)$UMAP[,1]
df0$UMAP_2=reducedDims(cds)$UMAP[,2]
df0$BatchID=pData(cds)$dataset_batch
df0=df0[is.finite(df0$pseudotime),]
df0=df0[order(df0$pseudotime,decreasing = F),,drop=F]
df0$x=df0$pseudotime/max(df0$pseudotime)
df_pseudotime_list$raw=df0
```

- Feature plots of `FCGR3A` and `S100A8`

```{r}
p=get_plot4(df00 = df0)
```

```{r fig.height=4,fig.width=18}
p
```

```{r}
df_den=colData(cds)
tt1=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt1

tt2=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T2"])
tt2

tt3=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T2"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt3
```

```{r}
Stable5[1,2:4]=matrix(get_p_new(c(tt1$p.value,tt2$p.value,tt3$p.value),c(tt1$statistic,tt2$statistic,tt3$statistic)),1,3)
```

## All genes raw 
```{r}
cell.meta.data=obj0@meta.data
cell.meta.data$dataset_batch=plyr::mapvalues(cell.meta.data$batch_label,names(maprules),maprules)
gene_ann=data.frame(gene_short_name = make.unique(rownames(raw.data)),row.names = make.unique(rownames(raw.data)))
#pd <- new("AnnotatedDataFrame",data=cell.meta.data)
#fd <- new("AnnotatedDataFrame",data=gene_ann)
cds <- new_cell_data_set(raw.data, cell_metadata = cell.meta.data,gene_metadata =gene_ann)
## Step 1: Normalize and pre-process the data
cds <- preprocess_cds(cds, num_dim = 32,method="PCA",norm_method="log",verbose = F)
## Step 2: Remove batch effects with cell alignment
##cds <- align_cds(cds, alignment_group = "BatchID", residual_model_formula_str = NULL)
## Step 3: Reduce the dimensions using UMAP
cds <- reduce_dimension(cds,reduction_method = "UMAP",preprocess_method="PCA",verbose = F)

## Step 4: Cluster the cells
cds <- cluster_cells(cds,reduction_method ="UMAP",cluster_method = "leiden",verbose = F)
# Construct the graph
# Note that, for the rest of the code to run, the graph should be fully (partionly) connected
## Step 5: Learn a graph
cds <- learn_graph(cds, use_partition = T,verbose = F)
colData(cds)$clusters=cds@clusters$UMAP$clusters
p1=plot_cells(cds,color_cells_by = "partition",label_cell_groups = F)+theme(legend.position = "top")
p2=plot_cells(cds,color_cells_by = "clusters",label_cell_groups=F,graph_label_size=2, label_leaves=F,label_branch_points=F)+theme(legend.position = "top")
p=cowplot::plot_grid(p1,p2,align = "h",ncol = 3)
```


```{r,fig.height=5,fig.width=18}
p
```

```{r}
## Step 6: Order cells
# a helper function to identify the root principal points:
get_earliest_principal_node <- function(cds, cluster=c("1","5")){
  root_pr_nodes=sapply(cluster,function(ii){
    cell_ids <- which(colData(cds)[, "clusters"] %in%ii)
  
  closest_vertex <-cds@principal_graph_aux[["UMAP"]]$pr_graph_cell_proj_closest_vertex
  
  closest_vertex <- as.matrix(closest_vertex[colnames(cds), ])
  root_pr_nodes <-igraph::V(principal_graph(cds)[["UMAP"]])$name[as.numeric(names(which.max(table(closest_vertex[cell_ids,]))))]
  })
  root_pr_nodes
}
# root cells
ids=get_earliest_principal_node(cds,cluster=c("4","5"))
cds <- order_cells(cds,root_pr_nodes = ids)
#plot_cells(cds,color_cells_by = "pseudotime")
```

```{r,fig.height=5,fig.width=18}
colData(cds)$pseudotime=pseudotime(cds)
colData(cds)$Pseudotime=colData(cds)$pseudotime/max(colData(cds)$pseudotime,na.rm = T)
df_den=pData(cds)[,c("Pseudotime","dataset_batch")]
df_den=as.data.frame(df_den[!is.infinite(df_den$Pseudotime),])

set.seed(10)
theme_use=theme(legend.text = element_text(size=16),
                legend.title = element_text(size=20))

p_ori_all_1=plot_cells(cds,color_cells_by = "dataset_batch",graph_label_size=0,alpha=1,cell_size = 0.6)+
  guides(colour = guide_legend(override.aes = list(alpha=0.7, size=5)))+
  theme_use+
  theme(legend.position = "top")+theme(legend.title = element_blank())
          
p_ori_all_2=plot_cells(cds,color_cells_by = "Pseudotime",label_branch_points=T,graph_label_size=2,alpha=1,cell_size = 0.6)+
            theme(legend.position = "top",
              legend.title = element_text(vjust = 0.2),
              legend.text = element_text(angle=-50 ),
                  legend.key.height = unit(0.5,"cm"),
                  legend.key.width = unit(1,"cm"))+
            guides(color = guide_colourbar(label.position = "top"))+theme_use

p_ori_all_3=ggplot(data=df_den)+geom_density(aes(x=Pseudotime,fill=dataset_batch),alpha=0.7)+
            scale_y_continuous(expand = c(0,0))+
            scale_x_continuous(expand = c(0,0))+
            theme(legend.position="top")+theme_use

p_monocle_ori_all=egg::ggarrange(p_ori_all_1,p_ori_all_2,p_ori_all_3,ncol=3,draw=F)
```

```{r}
# printed how many cells with no pseudotime
table(as.numeric(is.infinite(pData(cds)[,c("pseudotime")]))) #0 mean normal pseudotime and 1 means infinity.
```

```{r fig.height=5,fig.width=18}
p_monocle_ori_all
```

```{r}
cds_exprs=FetchData(obj0,vars = c("FCGR3A","S100A8"))
df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,cds_exprs))
#cds_exprs=as.matrix(SingleCellExperiment::counts(cds)[c("FCGR3A","S100A8"),])
#df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,log1p(t(cds_exprs)/size_factors(cds))))
df0$UMAP_1=reducedDims(cds)$UMAP[,1]
df0$UMAP_2=reducedDims(cds)$UMAP[,2]
df0$BatchID=pData(cds)$dataset_batch
df0=df0[is.finite(df0$pseudotime),]
df0=df0[order(df0$pseudotime,decreasing = F),,drop=F]
df0$x=df0$pseudotime/max(df0$pseudotime)
df_pseudotime_list$raw_all=df0
```

- Feature plots of `FCGR3A` and `S100A8`

```{r}
p=get_plot4(df00 = df0)
```

```{r,fig.height=4,fig.width=18}
p
```

```{r}
df_den=colData(cds)
tt1=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt1
tt2=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T2"])
tt2
tt3=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T2"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt3
```

```{r}
Stable5[2,2:4]=matrix(get_p_new(c(tt1$p.value,tt2$p.value,tt3$p.value),c(tt1$statistic,tt2$statistic,tt3$statistic)),1,3)
```

# Monocle3 using carDEC

In this section, we will evalutate the performance of carDEC. 

Note that: carDEC used all genes and extracted HVG to evaluate.

```{r}
adata=ad$read_h5ad("../final_processed_results/CarDEC Results/adata_CarDEC.h5ad")
```

```{r}
cell.meta.data=py_to_r(adata$obs)
cell.meta.data$dataset_batch=plyr::mapvalues(cell.meta.data$batch_label,names(maprules),maprules)
gene_ann0=py_to_r(adata$var)
gene_ann=data.frame(gene_short_name = make.unique(rownames(gene_ann0)),
                    VarianceType=gene_ann0$`Variance Type`,
                    row.names = make.unique(rownames(gene_ann0)))
mtx=t(py_to_r(adata$layers['denoised counts']))
colnames(mtx)=cell.meta.data$cellname
rownames(mtx)=rownames(gene_ann)
mtx_sizefactor=1e4/colSums(mtx)
```


## Using latent 

```{r}
cds <- new_cell_data_set(mtx, cell_metadata = cell.meta.data,gene_metadata =gene_ann)
## Step 1: Normalize and pre-process the data
cds <- preprocess_cds(cds, num_dim = 32,method="PCA",norm_method="log",verbose = F)

tmp0=py_to_r(adata$obsm["embedding"])
colnames(tmp0)=paste0("PC",1:ncol(tmp0))
reducedDims(cds)$PCA=tmp0

## Step 2: Remove batch effects with cell alignment
##cds <- align_cds(cds, alignment_group = "BatchID", residual_model_formula_str = NULL)
## Step 3: Reduce the dimensions using UMAP
cds <- reduce_dimension(cds,reduction_method = "UMAP",preprocess_method="PCA",verbose = F)

## Step 4: Cluster the cells
cds <- cluster_cells(cds,reduction_method ="UMAP",cluster_method = "leiden",verbose = F)

# Construct the graph
# Note that, for the rest of the code to run, the graph should be fully (partionly) connected
## Step 5: Learn a graph
cds <- learn_graph(cds, use_partition = T,verbose = F)
colData(cds)$clusters=cds@clusters$UMAP$clusters
p1=plot_cells(cds,color_cells_by = "partition",label_cell_groups = F)+theme(legend.position = "top")
p2=plot_cells(cds,color_cells_by = "clusters",label_cell_groups=F,graph_label_size=2, label_leaves=F,label_branch_points=F)+theme(legend.position = "top")
p=cowplot::plot_grid(p1,p2,align = "h",ncol = 3)
```

```{r, fig.height=5,fig.width=18}
p
```

```{r}
## Step 6: Order cells
# root cells
ids=get_earliest_principal_node(cds,cluster=c("3"))
cds <- order_cells(cds, root_pr_nodes=ids)
plot_cells(cds,color_cells_by = "pseudotime")
```


```{r,fig.height=5,fig.width=18}
colData(cds)$pseudotime=pseudotime(cds)
colData(cds)$Pseudotime=colData(cds)$pseudotime/max(colData(cds)$pseudotime,na.rm = T)
df_den=pData(cds)[,c("Pseudotime","dataset_batch")]
df_den=as.data.frame(df_den[!is.infinite(df_den$Pseudotime),])
set.seed(10)

theme_use=theme(legend.text = element_text(size=16),
                legend.title = element_text(size=20))

p_carDEC_latent_1=plot_cells(cds,color_cells_by = "dataset_batch",,graph_label_size=0,alpha=1,cell_size = 0.6)+
  guides(colour = guide_legend(override.aes = list(alpha=0.7, size=5)))+
  theme_use+
  theme(legend.position = "top")
          
p_carDEC_latent_2=plot_cells(cds,color_cells_by = "Pseudotime",label_branch_points=T,graph_label_size=2,alpha=1,cell_size = 0.6)+
            theme(legend.position = "top",
              legend.title = element_text(vjust = 0.2),
              legend.text = element_text(angle=-50 ),
                  legend.key.height = unit(0.5,"cm"),
                  legend.key.width = unit(1,"cm"))+
            guides(color = guide_colourbar(label.position = "top"))+theme_use

p_carDEC_latent_3=ggplot(data=df_den)+geom_density(aes(x=Pseudotime,fill=dataset_batch),alpha=0.7)+
            scale_y_continuous(expand = c(0,0))+
            scale_x_continuous(expand = c(0,0))+
            theme(legend.position="top")+theme_use

p_monocle_carDEC_latent=egg::ggarrange(p_carDEC_latent_1,p_carDEC_latent_2,p_carDEC_latent_3,ncol=3,draw=F)
```


```{r, fig.height=5,fig.width=18}
p_monocle_carDEC_latent
```

```{r}
#cds_exprs=FetchData(obj0,vars = c("FCGR3A","S100A8"))
#df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,cds_exprs))=1e4/rowSums(mtx)
cds_exprs=as.matrix(SingleCellExperiment::counts(cds)[c("FCGR3A","S100A8"),])
df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,log1p(t(cds_exprs)*mtx_sizefactor)))
df0$UMAP_1=reducedDims(cds)$UMAP[,1]
df0$UMAP_2=reducedDims(cds)$UMAP[,2]
df0$BatchID=pData(cds)$dataset_batch
df0=df0[is.finite(df0$pseudotime),]
df0=df0[order(df0$pseudotime,decreasing = F),,drop=F]
df0$x=df0$pseudotime/max(df0$pseudotime)
df_pseudotime_list$carDEC_latent=df0
```


- Feature plots of `FCGR3A` and `S100A8`

```{r}
p=get_plot4(df00 = df0)
```

```{r, fig.height=4,fig.width=18}
p
```

```{r}
df_den=colData(cds)
tt1=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt1
tt2=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T2"])
tt2
tt3=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T2"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt3
```

```{r}
Stable5[3,2:4]=matrix(get_p_new(c(tt1$p.value,tt2$p.value,tt3$p.value),c(tt1$statistic,tt2$statistic,tt3$statistic)),1,3)
```


## HVGs denoised

```{r}
cds <- new_cell_data_set(mtx[gene_ann$VarianceType=="HVG",], cell_metadata = cell.meta.data,gene_metadata =gene_ann[gene_ann$VarianceType=="HVG",,drop=F])
## Step 1: Normalize and pre-process the data
cds <- preprocess_cds(cds, num_dim = 32,method="PCA",norm_method="log")

## Step 2: Remove batch effects with cell alignment
##cds <- align_cds(cds, alignment_group = "BatchID", residual_model_formula_str = NULL)
## Step 3: Reduce the dimensions using UMAP
cds <- reduce_dimension(cds,reduction_method = "UMAP",preprocess_method="PCA",verbose = F)

## Step 4: Cluster the cells
cds <- cluster_cells(cds,reduction_method ="UMAP",cluster_method = "leiden",verbose = F)

# Construct the graph
# Note that, for the rest of the code to run, the graph should be fully (partionly) connected
## Step 5: Learn a graph
cds <- learn_graph(cds, use_partition = T,verbose = F)
colData(cds)$clusters=cds@clusters$UMAP$clusters
p1=plot_cells(cds,color_cells_by = "partition",label_cell_groups = F)+theme(legend.position = "top")
p2=plot_cells(cds,color_cells_by = "clusters",label_cell_groups=F,graph_label_size=2, label_leaves=F,label_branch_points=F)+theme(legend.position = "top")
p=cowplot::plot_grid(p1,p2,align = "h",ncol = 3)
```

```{r fig.height=5,fig.width=18}
p
```

```{r}
## Step 6: Order cells
# root cells
ids=get_earliest_principal_node(cds,cluster=c("4"))
cds <- order_cells(cds, root_pr_nodes=ids)
#plot_cells(cds,color_cells_by = "pseudotime")
```


```{r,fig.height=5,fig.width=18}
colData(cds)$pseudotime=pseudotime(cds)
colData(cds)$Pseudotime=colData(cds)$pseudotime/max(colData(cds)$pseudotime,na.rm = T)
df_den=pData(cds)[,c("Pseudotime","dataset_batch")]
df_den=as.data.frame(df_den[!is.infinite(df_den$Pseudotime),])
set.seed(10)

theme_use=theme(legend.text = element_text(size=16),
                legend.title = element_text(size=20))

p_carDEC_denoised_hvg_1 = plot_cells(cds,color_cells_by = "dataset_batch",,graph_label_size=0,alpha=1,cell_size = 0.6)+
  guides(colour = guide_legend(override.aes = list(alpha=0.7, size=5)))+
  theme_use+
  theme(legend.position = "top")
          
p_carDEC_denoised_hvg_2=plot_cells(cds,color_cells_by = "Pseudotime",label_branch_points=T,graph_label_size=2,alpha=1,cell_size = 0.6)+
            theme(legend.position = "top",
              legend.title = element_text(vjust = 0.2),
              legend.text = element_text(angle=-50 ),
                  legend.key.height = unit(0.5,"cm"),
                  legend.key.width = unit(1,"cm"))+
            guides(color = guide_colourbar(label.position = "top"))+theme_use

p_carDEC_denoised_hvg_3=ggplot(data=df_den)+geom_density(aes(x=Pseudotime,fill=dataset_batch),alpha=0.7)+
            scale_y_continuous(expand = c(0,0))+
            scale_x_continuous(expand = c(0,0))+
            theme(legend.position="top")+theme_use

p_monocle_carDEC_denoised_hvg=egg::ggarrange(p_carDEC_denoised_hvg_1,p_carDEC_denoised_hvg_2,p_carDEC_denoised_hvg_3,ncol=3,draw=F)
```


```{r, fig.height=5,fig.width=18}
p_monocle_carDEC_denoised_hvg
```


```{r}
#cds_exprs=FetchData(obj0,vars = c("FCGR3A","S100A8"))
#df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,cds_exprs))
cds_exprs=as.matrix(SingleCellExperiment::counts(cds)[c("FCGR3A","S100A8"),])
df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,log1p(t(cds_exprs)*mtx_sizefactor)))
df0$UMAP_1=reducedDims(cds)$UMAP[,1]
df0$UMAP_2=reducedDims(cds)$UMAP[,2]
df0$BatchID=pData(cds)$dataset_batch
df0=df0[is.finite(df0$pseudotime),]
df0=df0[order(df0$pseudotime,decreasing = F),,drop=F]
df0$x=df0$pseudotime/max(df0$pseudotime)
df_pseudotime_list$carDEC_denoised_hvg=df0
```

- Feature plots of `FCGR3A` and `S100A8`

```{r}
p=get_plot4(df00 = df0)
```

```{r, fig.height=4,fig.width=18}
p
```

```{r}
df_den=colData(cds)
tt1=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt1
tt2=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T2"])
tt2
tt3=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T2"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt3
```

```{r}
Stable5[4,2:4]=matrix(get_p_new(c(tt1$p.value,tt2$p.value,tt3$p.value),c(tt1$statistic,tt2$statistic,tt3$statistic)),1,3)
```

## All genes denoised

```{r}
cds <- new_cell_data_set(mtx, cell_metadata = cell.meta.data,gene_metadata =gene_ann)
## Step 1: Normalize and pre-process the data
cds <- preprocess_cds(cds, num_dim = 32,method="PCA",norm_method="log",verbose = F)

## Step 2: Remove batch effects with cell alignment
##cds <- align_cds(cds, alignment_group = "BatchID", residual_model_formula_str = NULL)
## Step 3: Reduce the dimensions using UMAP
cds <- reduce_dimension(cds,reduction_method = "UMAP",preprocess_method="PCA",verbose = F)

## Step 4: Cluster the cells
cds <- cluster_cells(cds,reduction_method ="UMAP",cluster_method = "leiden",verbose = F)

# Construct the graph
# Note that, for the rest of the code to run, the graph should be fully (partionly) connected
## Step 5: Learn a graph
cds <- learn_graph(cds, use_partition = T,verbose = F)
colData(cds)$clusters=cds@clusters$UMAP$clusters
p1=plot_cells(cds,color_cells_by = "partition",label_cell_groups = F)+theme(legend.position = "top")
p2=plot_cells(cds,color_cells_by = "clusters",label_cell_groups=F,graph_label_size=2, label_leaves=F,label_branch_points=F)+theme(legend.position = "top")
p=cowplot::plot_grid(p1,p2,align = "h",ncol = 3)
```

```{r, fig.height=5,fig.width=18}
p
```

```{r}
## Step 6: Order cells
# root cells
ids=get_earliest_principal_node(cds,cluster=c("4"))
cds <- order_cells(cds, root_pr_nodes=ids)
#plot_cells(cds,color_cells_by = "pseudotime")
```

```{r,fig.height=5,fig.width=18}
colData(cds)$pseudotime=pseudotime(cds)
colData(cds)$Pseudotime=colData(cds)$pseudotime/max(colData(cds)$pseudotime,na.rm = T)
df_den=pData(cds)[,c("Pseudotime","dataset_batch")]
df_den=as.data.frame(df_den[!is.infinite(df_den$Pseudotime),])
set.seed(10)

theme_use=theme(legend.text = element_text(size=16),
                legend.title = element_text(size=20))

p_carDEC_denoised_all_1=plot_cells(cds,color_cells_by = "dataset_batch",,graph_label_size=0,alpha=1,cell_size = 0.6)+
  guides(colour = guide_legend(override.aes = list(alpha=0.7, size=5)))+
  theme_use+
  theme(legend.position = "top")
          
p_carDEC_denoised_all_2=plot_cells(cds,color_cells_by = "Pseudotime",label_branch_points=T,graph_label_size=2,alpha=1,cell_size = 0.6)+
            theme(legend.position = "top",
              legend.title = element_text(vjust = 0.2),
              legend.text = element_text(angle=-50 ),
                  legend.key.height = unit(0.5,"cm"),
                  legend.key.width = unit(1,"cm"))+
            guides(color = guide_colourbar(label.position = "top"))+theme_use

p_carDEC_denoised_all_3=ggplot(data=df_den)+geom_density(aes(x=Pseudotime,fill=dataset_batch),alpha=0.7)+
            scale_y_continuous(expand = c(0,0))+
            scale_x_continuous(expand = c(0,0))+
            theme(legend.position="top")+theme_use

p_monocle_carDEC_denoised_all=egg::ggarrange(p_carDEC_denoised_all_1,p_carDEC_denoised_all_2,p_carDEC_denoised_all_3,ncol=3,draw=F)
```


```{r, fig.height=5,fig.width=18}
p_monocle_carDEC_denoised_all
```

```{r}
#cds_exprs=FetchData(obj0,vars = c("FCGR3A","S100A8"))
#df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,cds_exprs))
cds_exprs=as.matrix(SingleCellExperiment::counts(cds)[c("FCGR3A","S100A8"),])
df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,log1p(t(cds_exprs)*mtx_sizefactor)))
df0$UMAP_1=reducedDims(cds)$UMAP[,1]
df0$UMAP_2=reducedDims(cds)$UMAP[,2]
df0$BatchID=pData(cds)$dataset_batch
df0=df0[is.finite(df0$pseudotime),]
df0=df0[order(df0$pseudotime,decreasing = F),,drop=F]
df0$x=df0$pseudotime/max(df0$pseudotime)
df_pseudotime_list$carDEC_denoised_all=df0
```

- Feature plots of `FCGR3A` and `S100A8`

```{r}
p=get_plot4(df00 = df0)
```

```{r, fig.height=4,fig.width=18}
p
```

```{r}
df_den=colData(cds)
tt1=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt1
tt2=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T2"])
tt2
tt3=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T2"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt3
```

```{r}
Stable5[5,2:4]=matrix(get_p_new(c(tt1$p.value,tt2$p.value,tt3$p.value),c(tt1$statistic,tt2$statistic,tt3$statistic)),1,3)
```

# Monocle3 using scVI

```{r}
#adata=ad$read_h5ad("../final_processed_results/scVI Results/monocytes_ALL/adata_all.h5ad")
adata=ad$read_h5ad("../final_processed_results/scVI Results New/monocytes_ALL/adata_all.h5ad")
```

```{r}
cell.meta.data=py_to_r(adata$obs)
cell.meta.data$dataset_batch=plyr::mapvalues(cell.meta.data$batch_label,names(maprules),maprules)
gene_ann0=py_to_r(adata$var)
gene_ann=data.frame(gene_short_name = make.unique(rownames(gene_ann0)),
                    row.names = make.unique(rownames(gene_ann0)))
mtx=t(py_to_r(adata$X))
colnames(mtx)=cell.meta.data$cellname
rownames(mtx)=rownames(gene_ann)
mtx_sizefactor=1e4/colSums(mtx)
```

## Using latent

```{r}
#mtx=mtx[gene_ann$VarianceType=="HVG",]
cds <- new_cell_data_set(mtx, cell_metadata = cell.meta.data,gene_metadata =gene_ann)
## Step 1: Normalize and pre-process the data
cds <- preprocess_cds(cds, num_dim = 32,method="PCA",norm_method="log",verbose = F)
tmp0=py_to_r(adata$obsm["X_latent"])
colnames(tmp0)=paste0("PC",1:ncol(tmp0))
reducedDims(cds)$PCA=tmp0

## Step 2: Remove batch effects with cell alignment
##cds <- align_cds(cds, alignment_group = "BatchID", residual_model_formula_str = NULL)
## Step 3: Reduce the dimensions using UMAP
cds <- reduce_dimension(cds,reduction_method = "UMAP",preprocess_method="PCA",verbose = F)

## Step 4: Cluster the cells
cds <- cluster_cells(cds,reduction_method ="UMAP",cluster_method = "leiden",verbose = F)

# Construct the graph
# Note that, for the rest of the code to run, the graph should be fully (partionly) connected
## Step 5: Learn a graph
cds <- learn_graph(cds, use_partition = T,verbose = F)
colData(cds)$clusters=cds@clusters$UMAP$clusters
p1=plot_cells(cds,color_cells_by = "partition",label_cell_groups = F)+theme(legend.position = "top")
p2=plot_cells(cds,color_cells_by = "clusters",label_cell_groups=F,graph_label_size=2, label_leaves=F,label_branch_points=F)+theme(legend.position = "top")
p=cowplot::plot_grid(p1,p2,align = "h",ncol = 3)
```

```{r, fig.height=5,fig.width=18}
p
```

```{r}
## Step 6: Order cells
# root cells
ids=get_earliest_principal_node(cds,cluster=c("3"))
cds <- order_cells(cds, root_pr_nodes=ids)
#plot_cells(cds,color_cells_by = "pseudotime")
```


```{r,fig.height=5,fig.width=18}
colData(cds)$pseudotime=pseudotime(cds)
colData(cds)$Pseudotime=colData(cds)$pseudotime/max(colData(cds)$pseudotime,na.rm = T)
#saveRDS(cds,file = "cds_scvi.rds")
df_den=pData(cds)[,c("Pseudotime","dataset_batch")]
df_den=as.data.frame(df_den[!is.infinite(df_den$Pseudotime),])
set.seed(10)

theme_use=theme(legend.text = element_text(size=16),
                legend.title = element_text(size=20))

p_scVI_latent_all_1=plot_cells(cds,color_cells_by = "dataset_batch",,graph_label_size=0,alpha=1,cell_size = 0.6)+
  guides(colour = guide_legend(override.aes = list(alpha=0.7, size=5)))+
  theme_use+
  theme(legend.position = "top")
          
p_scVI_latent_all_2=plot_cells(cds,color_cells_by = "Pseudotime",label_branch_points=T,graph_label_size=2,alpha=1,cell_size = 0.6)+
            theme(legend.position = "top",
              legend.title = element_text(vjust = 0.2),
              legend.text = element_text(angle=-50 ),
                  legend.key.height = unit(0.5,"cm"),
                  legend.key.width = unit(1,"cm"))+
            guides(color = guide_colourbar(label.position = "top"))+theme_use

p_scVI_latent_all_3=ggplot(data=df_den)+geom_density(aes(x=Pseudotime,fill=dataset_batch),alpha=0.7)+
            scale_y_continuous(expand = c(0,0))+
            scale_x_continuous(expand = c(0,0))+
            theme(legend.position="top")+theme_use

p_monocle_scVI_latent_all=egg::ggarrange(p_scVI_latent_all_1,p_scVI_latent_all_2,p_scVI_latent_all_3,ncol=3,draw=F)
```

```{r, fig.height=5,fig.width=18}
p_monocle_scVI_latent_all
```



```{r}
#cds_exprs=FetchData(obj0,vars = c("FCGR3A","S100A8"))
#df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,cds_exprs))
cds_exprs=as.matrix(SingleCellExperiment::counts(cds)[c("FCGR3A","S100A8"),])
#df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,log1p(t(cds_exprs)*mtx_sizefactor)))
df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,t(cds_exprs)))
df0$UMAP_1=reducedDims(cds)$UMAP[,1]
df0$UMAP_2=reducedDims(cds)$UMAP[,2]
df0$BatchID=pData(cds)$dataset_batch
df0=df0[is.finite(df0$pseudotime),]
df0=df0[order(df0$pseudotime,decreasing = F),,drop=F]
df0$x=df0$pseudotime/max(df0$pseudotime)
df_pseudotime_list$scVI_latent_all=df0
```

- Feature plots of `FCGR3A` and `S100A8`

```{r}
p=get_plot4(df00 = df0)
```

```{r, fig.height=4,fig.width=18}
p
```

```{r}
df_den=colData(cds)
tt1=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt1
tt2=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T2"])
tt2
tt3=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T2"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt3
```

```{r}
Stable5[6,2:4]=matrix(get_p_new(c(tt1$p.value,tt2$p.value,tt3$p.value),c(tt1$statistic,tt2$statistic,tt3$statistic)),1,3)
```


## HVGs denoised


```{r}
cds <- new_cell_data_set(mtx[rownames(mtx)%in%hvg_genes$genename,], cell_metadata = cell.meta.data,gene_metadata =gene_ann[gene_ann$gene_short_name%in%hvg_genes$genename,,drop=F])
## Step 1: Normalize and pre-process the data
cds <- preprocess_cds(cds, num_dim = 32,method="PCA",norm_method="log",verbose = F)

## Step 2: Remove batch effects with cell alignment
##cds <- align_cds(cds, alignment_group = "BatchID", residual_model_formula_str = NULL)
## Step 3: Reduce the dimensions using UMAP
cds <- reduce_dimension(cds,reduction_method = "UMAP",preprocess_method="PCA",verbose = F)

## Step 4: Cluster the cells
cds <- cluster_cells(cds,reduction_method ="UMAP",cluster_method = "leiden",verbose = F)

# Construct the graph
# Note that, for the rest of the code to run, the graph should be fully (partionly) connected
## Step 5: Learn a graph
cds <- learn_graph(cds, use_partition = T,verbose = F)
colData(cds)$clusters=cds@clusters$UMAP$clusters
p1=plot_cells(cds,color_cells_by = "partition",label_cell_groups = F)+theme(legend.position = "top")
p2=plot_cells(cds,color_cells_by = "clusters",label_cell_groups=F,graph_label_size=2, label_leaves=F,label_branch_points=F)+theme(legend.position = "top")
p=cowplot::plot_grid(p1,p2,align = "h",ncol = 3)
```

```{r, fig.height=5,fig.width=18}
p
```

```{r}
## Step 6: Order cells
# root cells
ids=get_earliest_principal_node(cds,cluster=c("3"))
cds <- order_cells(cds, root_pr_nodes=ids)
#plot_cells(cds,color_cells_by = "pseudotime")
```


```{r,fig.height=5,fig.width=18}
colData(cds)$pseudotime=pseudotime(cds)
colData(cds)$Pseudotime=colData(cds)$pseudotime/max(colData(cds)$pseudotime,na.rm = T)
df_den=pData(cds)[,c("Pseudotime","dataset_batch")]
df_den=as.data.frame(df_den[!is.infinite(df_den$Pseudotime),])
set.seed(10)

theme_use=theme(legend.text = element_text(size=16),
                legend.title = element_text(size=20))

p_scvi_denoised_hvg_1=plot_cells(cds,color_cells_by = "dataset_batch",,graph_label_size=0,alpha=1,cell_size = 0.6)+
  guides(colour = guide_legend(override.aes = list(alpha=0.7, size=5)))+
  theme_use+
  theme(legend.position = "top")
          
p_scvi_denoised_hvg_2=plot_cells(cds,color_cells_by = "Pseudotime",label_branch_points=T,graph_label_size=2,alpha=1,cell_size = 0.6)+
            theme(legend.position = "top",
              legend.title = element_text(vjust = 0.2),
              legend.text = element_text(angle=-50 ),
                  legend.key.height = unit(0.5,"cm"),
                  legend.key.width = unit(1,"cm"))+
            guides(color = guide_colourbar(label.position = "top"))+theme_use

p_scvi_denoised_hvg_3=ggplot(data=df_den)+geom_density(aes(x=Pseudotime,fill=dataset_batch),alpha=0.7)+
            scale_y_continuous(expand = c(0,0))+
            scale_x_continuous(expand = c(0,0))+
            theme(legend.position="top")+theme_use

p_monocle_scvi_denoised_hvg=egg::ggarrange(p_scvi_denoised_hvg_1,p_scvi_denoised_hvg_2,p_scvi_denoised_hvg_3,ncol=3,draw=F)
```

```{r, fig.height=5,fig.width=18}
p_monocle_scvi_denoised_hvg
```

```{r}
#cds_exprs=FetchData(obj0,vars = c("FCGR3A","S100A8"))
#df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,cds_exprs))
cds_exprs=as.matrix(SingleCellExperiment::counts(cds)[c("FCGR3A","S100A8"),])
#df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,log1p(t(cds_exprs)*mtx_sizefactor)))
df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,t(cds_exprs)))
df0$UMAP_1=reducedDims(cds)$UMAP[,1]
df0$UMAP_2=reducedDims(cds)$UMAP[,2]
df0$BatchID=pData(cds)$dataset_batch
df0=df0[is.finite(df0$pseudotime),]
df0=df0[order(df0$pseudotime,decreasing = F),,drop=F]
df0$x=df0$pseudotime/max(df0$pseudotime)
df_pseudotime_list$scVI_denosied_hvg=df0
```

- Feature plots of `FCGR3A` and `S100A8`

```{r}
p=get_plot4(df00 = df0)
```

```{r, fig.height=4,fig.width=18}
p
```

```{r}
df_den=colData(cds)
tt1=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt1
tt2=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T2"])
tt2
tt3=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T2"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt3
```

```{r}
Stable5[7,2:4]=matrix(get_p_new(c(tt1$p.value,tt2$p.value,tt3$p.value),c(tt1$statistic,tt2$statistic,tt3$statistic)),1,3)
```

## All genes denoised

```{r}
cds <- new_cell_data_set(mtx, cell_metadata = cell.meta.data,gene_metadata =gene_ann)
## Step 1: Normalize and pre-process the data
cds <- preprocess_cds(cds, num_dim = 32,method="PCA",norm_method="log",verbose = F)

## Step 2: Remove batch effects with cell alignment
##cds <- align_cds(cds, alignment_group = "BatchID", residual_model_formula_str = NULL)
## Step 3: Reduce the dimensions using UMAP
cds <- reduce_dimension(cds,reduction_method = "UMAP",preprocess_method="PCA",verbose = F)

## Step 4: Cluster the cells
cds <- cluster_cells(cds,reduction_method ="UMAP",cluster_method = "leiden",verbose = F)

# Construct the graph
# Note that, for the rest of the code to run, the graph should be fully (partionly) connected
## Step 5: Learn a graph
cds <- learn_graph(cds, use_partition = T,verbose = F)
colData(cds)$clusters=cds@clusters$UMAP$clusters
p1=plot_cells(cds,color_cells_by = "partition",label_cell_groups = F)+theme(legend.position = "top")
p2=plot_cells(cds,color_cells_by = "clusters",label_cell_groups=F,graph_label_size=2, label_leaves=F,label_branch_points=F)+theme(legend.position = "top")
p=cowplot::plot_grid(p1,p2,align = "h",ncol = 3)
```

```{r, fig.height=5,fig.width=18}
p
```

```{r}
## Step 6: Order cells
# root cells
ids=get_earliest_principal_node(cds,cluster=c("3"))
cds <- order_cells(cds, root_pr_nodes=ids)
#plot_cells(cds,color_cells_by = "pseudotime")
```


```{r,fig.height=5,fig.width=18}
x0=pseudotime(cds)
x0[is.infinite(x0)]=NA
colData(cds)$pseudotime=x0

colData(cds)$Pseudotime=colData(cds)$pseudotime/max(colData(cds)$pseudotime,na.rm = T)
df_den=pData(cds)[,c("Pseudotime","dataset_batch")]
df_den=as.data.frame(df_den[!is.infinite(df_den$Pseudotime),])
set.seed(10)

theme_use=theme(legend.text = element_text(size=16),
                legend.title = element_text(size=20))

p_scVI_denoised_all_1=plot_cells(cds,color_cells_by = "dataset_batch",,graph_label_size=0,alpha=1,cell_size = 0.6)+
  guides(colour = guide_legend(override.aes = list(alpha=0.7, size=5)))+
  theme_use+
  theme(legend.position = "top")
          
p_scVI_denoised_all_2=plot_cells(cds,color_cells_by = "Pseudotime",label_branch_points=T,graph_label_size=2,alpha=1,cell_size = 0.6)+
            theme(legend.position = "top",
              legend.title = element_text(vjust = 0.2),
              legend.text = element_text(angle=-50 ),
                  legend.key.height = unit(0.5,"cm"),
                  legend.key.width = unit(1,"cm"))+
            guides(color = guide_colourbar(label.position = "top"))+theme_use

p_scVI_denoised_all_3=ggplot(data=df_den)+geom_density(aes(x=Pseudotime,fill=dataset_batch),alpha=0.7)+
            scale_y_continuous(expand = c(0,0))+
            scale_x_continuous(expand = c(0,0))+
            theme(legend.position="top")+theme_use

p_monocle_scVI_denoised_all=egg::ggarrange(p_scVI_denoised_all_1,p_scVI_denoised_all_2,p_scVI_denoised_all_3,ncol=3,draw=F)
```

```{r fig.height=5,fig.width=18}
p_monocle_scVI_denoised_all
```


```{r}
#cds_exprs=FetchData(obj0,vars = c("FCGR3A","S100A8"))
#df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,cds_exprs))
cds_exprs=as.matrix(SingleCellExperiment::counts(cds)[c("FCGR3A","S100A8"),])
#df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,log1p(t(cds_exprs)*mtx_sizefactor)))
df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,t(cds_exprs)))
df0$UMAP_1=reducedDims(cds)$UMAP[,1]
df0$UMAP_2=reducedDims(cds)$UMAP[,2]
df0$BatchID=pData(cds)$dataset_batch
df0=df0[is.finite(df0$pseudotime),]
df0=df0[order(df0$pseudotime,decreasing = F),,drop=F]
df0$x=df0$pseudotime/max(df0$pseudotime)
df_pseudotime_list$scVI_denosied_all=df0
```

- Feature plots of `FCGR3A` and `S100A8`

```{r}
p=get_plot4(df00 = df0)
```

```{r, fig.height=4,fig.width=18}
p
```

```{r}
df_den=colData(cds)
tt1=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt1
tt2=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T2"])
tt2
tt3=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T2"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt3
```

```{r}
Stable5[8,2:4]=matrix(get_p_new(c(tt1$p.value,tt2$p.value,tt3$p.value),c(tt1$statistic,tt2$statistic,tt3$statistic)),1,3)
```

# Monocle3 using dca+combat

```{r}
#adata=ad$read_h5ad("../final_processed_results/dca Results/adata_all.h5ad")
adata=ad$read_h5ad("../final_processed_results/dca Results New/adata_all.h5ad")
```

```{r}
cell.meta.data=py_to_r(adata$obs)
cell.meta.data$dataset_batch=plyr::mapvalues(cell.meta.data$batch_label,names(maprules),maprules)
gene_ann0=py_to_r(adata$var)
gene_ann=data.frame(gene_short_name = make.unique(rownames(gene_ann0)),
                    row.names = make.unique(rownames(gene_ann0)))
mtx=t(py_to_r(adata$X))
colnames(mtx)=cell.meta.data$cellname
rownames(mtx)=rownames(gene_ann)
mtx_sizefactor=1e4/colSums(mtx)
```

## Using combated latent from dca

```{r}
#mtx=mtx[gene_ann$VarianceType=="HVG",]
cds <- new_cell_data_set(mtx, cell_metadata = cell.meta.data,gene_metadata =gene_ann)
## Step 1: Normalize and pre-process the data
cds <- preprocess_cds(cds, num_dim = 32,method="PCA",norm_method="log")

tmp0=py_to_r(adata$obsm["X_dca_latent"]) #original dca
colnames(tmp0)=paste0("PC",1:ncol(tmp0))
reducedDims(cds)$PCA=tmp0

## Step 2: Remove batch effects with cell alignment
##cds <- align_cds(cds, alignment_group = "BatchID", residual_model_formula_str = NULL)
## Step 3: Reduce the dimensions using UMAP
cds <- reduce_dimension(cds,reduction_method = "UMAP",preprocess_method="PCA")

## Step 4: Cluster the cells
cds <- cluster_cells(cds,reduction_method ="UMAP",cluster_method = "leiden")

# Construct the graph
# Note that, for the rest of the code to run, the graph should be fully (partionly) connected
## Step 5: Learn a graph
cds <- learn_graph(cds, use_partition = T,verbose = F)
colData(cds)$clusters=cds@clusters$UMAP$clusters
p1=plot_cells(cds,color_cells_by = "partition",label_cell_groups = F)+theme(legend.position = "top")
p2=plot_cells(cds,color_cells_by = "clusters",label_cell_groups=F,graph_label_size=2, label_leaves=F,label_branch_points=F)+theme(legend.position = "top")
p=cowplot::plot_grid(p1,p2,align = "h",ncol = 3)
```

```{r fig.height=5,fig.width=18}
p
```

```{r}
## Step 6: Order cells
# root cells
ids=get_earliest_principal_node(cds,cluster=c("4"))
cds <- order_cells(cds, root_pr_nodes=ids)
#plot_cells(cds,color_cells_by = "pseudotime")
```


```{r,fig.height=5,fig.width=18}
colData(cds)$pseudotime=pseudotime(cds)
colData(cds)$Pseudotime=colData(cds)$pseudotime/max(colData(cds)$pseudotime,na.rm = T)
#saveRDS(cds,file="cds_dca.rds")
df_den=pData(cds)[,c("Pseudotime","dataset_batch")]
df_den=as.data.frame(df_den[!is.infinite(df_den$Pseudotime),])
set.seed(10)

theme_use=theme(legend.text = element_text(size=16),
                legend.title = element_text(size=20))

p_dca_latent_all_1=plot_cells(cds,color_cells_by = "dataset_batch",,graph_label_size=0,alpha=1,cell_size = 0.6)+
  guides(colour = guide_legend(override.aes = list(alpha=0.7, size=5)))+
  theme_use+
  theme(legend.position = "top")
          
p_dca_latent_all_2=plot_cells(cds,color_cells_by = "Pseudotime",label_branch_points=T,graph_label_size=2,alpha=1,cell_size = 0.6)+
            theme(legend.position = "top",
              legend.title = element_text(vjust = 0.2),
              legend.text = element_text(angle=-50 ),
                  legend.key.height = unit(0.5,"cm"),
                  legend.key.width = unit(1,"cm"))+
            guides(color = guide_colourbar(label.position = "top"))+theme_use

p_dca_latent_all_3=ggplot(data=df_den)+geom_density(aes(x=Pseudotime,fill=dataset_batch),alpha=0.7)+
            scale_y_continuous(expand = c(0,0))+
            scale_x_continuous(expand = c(0,0))+
            theme(legend.position="top")+theme_use

p_monocle_dca_latent_all=egg::ggarrange(p_dca_latent_all_1,p_dca_latent_all_2,p_dca_latent_all_3,ncol=3,draw=F)
```

```{r fig.height=5,fig.width=18}
p_monocle_dca_latent_all
```

```{r}
#cds_exprs=FetchData(obj0,vars = c("FCGR3A","S100A8"))
#df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,cds_exprs))
cds_exprs=as.matrix(SingleCellExperiment::counts(cds)[c("FCGR3A","S100A8"),])
#df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,log1p(t(cds_exprs)*mtx_sizefactor)))
df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,t(cds_exprs)))
df0$UMAP_1=reducedDims(cds)$UMAP[,1]
df0$UMAP_2=reducedDims(cds)$UMAP[,2]
df0$BatchID=pData(cds)$dataset_batch
df0=df0[is.finite(df0$pseudotime),]
df0=df0[order(df0$pseudotime,decreasing = F),,drop=F]
df0$x=df0$pseudotime/max(df0$pseudotime)
df_pseudotime_list$dca_latent_all=df0
```

- Feature plots of `FCGR3A` and `S100A8`

```{r}
p=get_plot4(df00 = df0)
```

```{r, fig.height=4,fig.width=18}
p
```

```{r}
df_den=colData(cds)
tt1=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt1
tt2=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T2"])
tt2
tt3=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T2"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt3
```

```{r}
Stable5[9,2:4]=matrix(get_p_new(c(tt1$p.value,tt2$p.value,tt3$p.value),c(tt1$statistic,tt2$statistic,tt3$statistic)),1,3)
```

## HVGs denoised

```{r}
cds <- new_cell_data_set(mtx[rownames(mtx)%in%hvg_genes$genename,], cell_metadata = cell.meta.data,gene_metadata =gene_ann[gene_ann$gene_short_name%in%hvg_genes$genename,,drop=F])
## Step 1: Normalize and pre-process the data
cds <- preprocess_cds(cds, num_dim = 32,method="PCA",norm_method="log",verbose = F)

tmp0=py_to_r(adata$obsm["X_pcahvg"]) #original dca
colnames(tmp0)=paste0("PC",1:ncol(tmp0))
reducedDims(cds)$PCA=tmp0


## Step 2: Remove batch effects with cell alignment
##cds <- align_cds(cds, alignment_group = "BatchID", residual_model_formula_str = NULL)
## Step 3: Reduce the dimensions using UMAP
cds <- reduce_dimension(cds,reduction_method = "UMAP",preprocess_method="PCA",verbose = F)

## Step 4: Cluster the cells
cds <- cluster_cells(cds,reduction_method ="UMAP",cluster_method = "leiden",verbose = F)

# Construct the graph
# Note that, for the rest of the code to run, the graph should be fully (partionly) connected
## Step 5: Learn a graph
cds <- learn_graph(cds, use_partition = T,verbose = F)
colData(cds)$clusters=cds@clusters$UMAP$clusters
p1=plot_cells(cds,color_cells_by = "partition",label_cell_groups = F)+theme(legend.position = "top")
p2=plot_cells(cds,color_cells_by = "clusters",label_cell_groups=F,graph_label_size=2, label_leaves=F,label_branch_points=F)+theme(legend.position = "top")
p=cowplot::plot_grid(p1,p2,align = "h",ncol = 3)
```

```{r fig.height=5,fig.width=18}
p
```

```{r}
## Step 6: Order cells
# root cells
ids=get_earliest_principal_node(cds,cluster=c("1","3","4"))
cds <- order_cells(cds, root_pr_nodes=ids)
#plot_cells(cds,color_cells_by = "pseudotime")
```


```{r,fig.height=5,fig.width=18}
colData(cds)$pseudotime=pseudotime(cds)
colData(cds)$Pseudotime=colData(cds)$pseudotime/max(colData(cds)$pseudotime,na.rm = T)
df_den=pData(cds)[,c("Pseudotime","dataset_batch")]
df_den=as.data.frame(df_den[!is.infinite(df_den$Pseudotime),])
set.seed(10)

theme_use=theme(legend.text = element_text(size=16),
                legend.title = element_text(size=20))

p_dca_denoised_hvg_1=plot_cells(cds,color_cells_by = "dataset_batch",,graph_label_size=0,alpha=1,cell_size = 0.6)+
  guides(colour = guide_legend(override.aes = list(alpha=0.7, size=5)))+
  theme_use+
  theme(legend.position = "top")
          
p_dca_denoised_hvg_2=plot_cells(cds,color_cells_by = "Pseudotime",label_branch_points=T,graph_label_size=2,alpha=1,cell_size = 0.6)+
            theme(legend.position = "top",
              legend.title = element_text(vjust = 0.2),
              legend.text = element_text(angle=-50 ),
                  legend.key.height = unit(0.5,"cm"),
                  legend.key.width = unit(1,"cm"))+
            guides(color = guide_colourbar(label.position = "top"))+theme_use

p_dca_denoised_hvg_3=ggplot(data=df_den)+geom_density(aes(x=Pseudotime,fill=dataset_batch),alpha=0.7)+
            scale_y_continuous(expand = c(0,0))+
            scale_x_continuous(expand = c(0,0))+
            theme(legend.position="top")+theme_use

p_monocle_dca_denoised_hvg=egg::ggarrange(p_dca_denoised_hvg_1,p_dca_denoised_hvg_2,p_dca_denoised_hvg_3,ncol=3,draw=F)
```

```{r fig.height=5,fig.width=18}
p_monocle_dca_denoised_hvg
```

```{r}
#cds_exprs=FetchData(obj0,vars = c("FCGR3A","S100A8"))
#df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,cds_exprs))
cds_exprs=as.matrix(SingleCellExperiment::counts(cds)[c("FCGR3A","S100A8"),])
df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,t(cds_exprs)))
df0$UMAP_1=reducedDims(cds)$UMAP[,1]
df0$UMAP_2=reducedDims(cds)$UMAP[,2]
df0$BatchID=pData(cds)$dataset_batch
df0=df0[is.finite(df0$pseudotime),]
df0=df0[order(df0$pseudotime,decreasing = F),,drop=F]
df0$x=df0$pseudotime/max(df0$pseudotime)
df_pseudotime_list$dca_denoised_hvg=df0
```

- Feature plots of `FCGR3A` and `S100A8`

```{r}
p=get_plot4(df00 = df0)
```

```{r, fig.height=4,fig.width=18}
p
```

```{r}
df_den=colData(cds)
tt1=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt1
tt2=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T2"])
tt2
tt3=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T2"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt3
```

```{r}
Stable5[10,2:4]=matrix(get_p_new(c(tt1$p.value,tt2$p.value,tt3$p.value),c(tt1$statistic,tt2$statistic,tt3$statistic)),1,3)
```

## All genes denoised

```{r}
cds <- new_cell_data_set(mtx, cell_metadata = cell.meta.data,gene_metadata =gene_ann)
## Step 1: Normalize and pre-process the data
cds <- preprocess_cds(cds, num_dim = 32,method="PCA",norm_method="log",verbose = F)

## Step 2: Remove batch effects with cell alignment
##cds <- align_cds(cds, alignment_group = "BatchID", residual_model_formula_str = NULL)
## Step 3: Reduce the dimensions using UMAP
cds <- reduce_dimension(cds,reduction_method = "UMAP",preprocess_method="PCA",verbose = F)

tmp0=py_to_r(adata$obsm["X_pcaall"]) #original dca
colnames(tmp0)=paste0("PC",1:ncol(tmp0))
reducedDims(cds)$PCA=tmp0


## Step 4: Cluster the cells
cds <- cluster_cells(cds,reduction_method ="UMAP",cluster_method = "leiden",verbose = F)

# Construct the graph
# Note that, for the rest of the code to run, the graph should be fully (partionly) connected
## Step 5: Learn a graph
cds <- learn_graph(cds, use_partition = T,verbose = F)
colData(cds)$clusters=cds@clusters$UMAP$clusters
p1=plot_cells(cds,color_cells_by = "partition",label_cell_groups = F)+theme(legend.position = "top")
p2=plot_cells(cds,color_cells_by = "clusters",label_cell_groups=F,graph_label_size=2, label_leaves=F,label_branch_points=F)+theme(legend.position = "top")
p=cowplot::plot_grid(p1,p2,align = "h",ncol = 3)
```

```{r, fig.height=5,fig.width=18}
p
```

```{r}
## Step 6: Order cells
# root cells
ids=get_earliest_principal_node(cds,cluster=c("2","4","5"))
cds <- order_cells(cds, root_pr_nodes=ids)
#plot_cells(cds,color_cells_by = "pseudotime")
```


```{r,fig.height=5,fig.width=18}
colData(cds)$pseudotime=pseudotime(cds)
colData(cds)$Pseudotime=colData(cds)$pseudotime/max(colData(cds)$pseudotime,na.rm = T)
df_den=pData(cds)[,c("Pseudotime","dataset_batch")]
df_den=as.data.frame(df_den[!is.infinite(df_den$Pseudotime),])
set.seed(10)

theme_use=theme(legend.text = element_text(size=16),
                legend.title = element_text(size=20))

p_dca_denoised_all_1=plot_cells(cds,color_cells_by = "dataset_batch",,graph_label_size=0,alpha=1,cell_size = 0.6)+
  guides(colour = guide_legend(override.aes = list(alpha=0.7, size=5)))+
  theme_use+
  theme(legend.position = "top")
          
p_dca_denoised_all_2=plot_cells(cds,color_cells_by = "Pseudotime",label_branch_points=T,graph_label_size=2,alpha=1,cell_size = 0.6)+
            theme(legend.position = "top",
              legend.title = element_text(vjust = 0.2),
              legend.text = element_text(angle=-50 ),
                  legend.key.height = unit(0.5,"cm"),
                  legend.key.width = unit(1,"cm"))+
            guides(color = guide_colourbar(label.position = "top"))+theme_use

p_dca_denoised_all_3=ggplot(data=df_den)+geom_density(aes(x=Pseudotime,fill=dataset_batch),alpha=0.7)+
            scale_y_continuous(expand = c(0,0))+
            scale_x_continuous(expand = c(0,0))+
            theme(legend.position="top")+theme_use

p_monocle_dca_denoised_all=egg::ggarrange(p_dca_denoised_all_1,p_dca_denoised_all_2,p_dca_denoised_all_3,ncol=3,draw=F)
```

```{r, fig.height=5,fig.width=18}
p_monocle_dca_denoised_all
```

```{r}
#cds_exprs=FetchData(obj0,vars = c("FCGR3A","S100A8"))
#df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,cds_exprs))
cds_exprs=as.matrix(SingleCellExperiment::counts(cds)[c("FCGR3A","S100A8"),])
#df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,log1p(t(cds_exprs)*mtx_sizefactor)))
df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,t(cds_exprs)))
df0$UMAP_1=reducedDims(cds)$UMAP[,1]
df0$UMAP_2=reducedDims(cds)$UMAP[,2]
df0$BatchID=pData(cds)$dataset_batch
df0=df0[is.finite(df0$pseudotime),]
df0=df0[order(df0$pseudotime,decreasing = F),,drop=F]
df0$x=df0$pseudotime/max(df0$pseudotime)
df_pseudotime_list$dca_denoised_all=df0
```

- Feature plots of `FCGR3A` and `S100A8`

```{r}
p=get_plot4(df00 = df0)
```

```{r, fig.height=4,fig.width=18}
p
```

```{r}
df_den=colData(cds)
tt1=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt1
tt2=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T2"])
tt2
tt3=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T2"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt3
```

```{r}
Stable5[11,2:4]=matrix(get_p_new(c(tt1$p.value,tt2$p.value,tt3$p.value),c(tt1$statistic,tt2$statistic,tt3$statistic)),1,3)
```


# Monocle3 using MNN

```{r}
output=readRDS("../final_processed_results/MNN_corrected_all.rds")
mtx=output@assays$data$corrected
cell.meta.data=colData(output)
cell.meta.data$dataset_batch=plyr::mapvalues(cell.meta.data$batch_label,names(maprules),maprules)
gene_ann=data.frame(gene_short_name = make.unique(rownames(mtx)),
                    row.names = make.unique(rownames(mtx)))

```

### HVGs denoised

```{r}
#mtx=mtx[gene_ann$VarianceType=="HVG",]
cds <- new_cell_data_set(mtx[rownames(mtx)%in%hvg_genes$genename,], 
                         cell_metadata = cell.meta.data,
                         gene_metadata =gene_ann[gene_ann$gene_short_name%in%hvg_genes$genename,,drop=F])
## Step
## Step 1: Normalize and pre-process the data
cds <- preprocess_cds(cds, num_dim = 32,method="PCA",norm_method="log")
#tmp0=py_to_r(adata$obsm["X_dca_latent"])
#colnames(tmp0)=paste0("PC",1:ncol(tmp0))
#reducedDims(cds)$PCA=tmp0

## Step 2: Remove batch effects with cell alignment
##cds <- align_cds(cds, alignment_group = "BatchID", residual_model_formula_str = NULL)
## Step 3: Reduce the dimensions using UMAP
cds <- reduce_dimension(cds,reduction_method = "UMAP",preprocess_method="PCA")

## Step 4: Cluster the cells
cds <- cluster_cells(cds,reduction_method ="UMAP",cluster_method = "leiden")

# Construct the graph
# Note that, for the rest of the code to run, the graph should be fully (partionly) connected
## Step 5: Learn a graph
cds <- learn_graph(cds, use_partition = T,verbose = F)
colData(cds)$clusters=cds@clusters$UMAP$clusters
p1=plot_cells(cds,color_cells_by = "partition",label_cell_groups = F)+theme(legend.position = "top")
p2=plot_cells(cds,color_cells_by = "clusters",label_cell_groups=F,graph_label_size=2, label_leaves=F,label_branch_points=F)+theme(legend.position = "top")
p=cowplot::plot_grid(p1,p2,align = "h",ncol = 3)
```

```{r, fig.height=5,fig.width=18}
p
```

```{r}
## Step 6: Order cells
# root cells
ids=get_earliest_principal_node(cds,cluster=c("2","3","4"))
cds <- order_cells(cds, root_pr_nodes=ids)
#plot_cells(cds,color_cells_by = "pseudotime")
```


```{r,fig.height=5,fig.width=18}
colData(cds)$pseudotime=pseudotime(cds)
colData(cds)$Pseudotime=colData(cds)$pseudotime/max(colData(cds)$pseudotime,na.rm = T)
#saveRDS(cds,file="cds_mnn.rds")
df_den=pData(cds)[,c("Pseudotime","dataset_batch")]
df_den=as.data.frame(df_den[!is.infinite(df_den$Pseudotime),])
set.seed(10)

theme_use=theme(legend.text = element_text(size=16),
                legend.title = element_text(size=20))

p_mnn_denoised_hvg_1=plot_cells(cds,color_cells_by = "dataset_batch",,graph_label_size=0,alpha=1,cell_size = 0.6)+
  guides(colour = guide_legend(override.aes = list(alpha=0.7, size=5)))+
  theme_use+
  theme(legend.position = "top")
          
p_mnn_denoised_hvg_2=plot_cells(cds,color_cells_by = "Pseudotime",label_branch_points=T,graph_label_size=2,alpha=1,cell_size = 0.6)+
            theme(legend.position = "top",
              legend.title = element_text(vjust = 0.2),
              legend.text = element_text(angle=-50 ),
                  legend.key.height = unit(0.5,"cm"),
                  legend.key.width = unit(1,"cm"))+
            guides(color = guide_colourbar(label.position = "top"))+theme_use

p_mnn_denoised_hvg_3=ggplot(data=df_den)+geom_density(aes(x=Pseudotime,fill=dataset_batch),alpha=0.7)+
            scale_y_continuous(expand = c(0,0))+
            scale_x_continuous(expand = c(0,0))+
            theme(legend.position="top")+theme_use

p_monocle_mnn_denoised_hvg=egg::ggarrange(p_mnn_denoised_hvg_1,p_mnn_denoised_hvg_2,p_mnn_denoised_hvg_3,ncol=3,draw=F)
```

```{r, fig.height=5,fig.width=18}
p_monocle_mnn_denoised_hvg
```

```{r}
#cds_exprs=FetchData(obj0,vars = c("FCGR3A","S100A8"))
#df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,cds_exprs))
cds_exprs=as.matrix(SingleCellExperiment::counts(cds)[c("FCGR3A","S100A8"),])
df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,t(cds_exprs)*mtx_sizefactor))
df0$UMAP_1=reducedDims(cds)$UMAP[,1]
df0$UMAP_2=reducedDims(cds)$UMAP[,2]
df0$BatchID=pData(cds)$dataset_batch
df0=df0[is.finite(df0$pseudotime),]
df0=df0[order(df0$pseudotime,decreasing = F),,drop=F]
df0$x=df0$pseudotime/max(df0$pseudotime)
df_pseudotime_list$mnn_denoised_hvg=df0
```

- Feature plots of `FCGR3A` and `S100A8`

```{r}
p=get_plot4(df00 = df0)
```

```{r, fig.height=4,fig.width=18}
p
```

```{r}
df_den=colData(cds)
tt1=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt1
tt2=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T2"])
tt2
tt3=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T2"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt3
```

```{r}
Stable5[12,2:4]=matrix(get_p_new(c(tt1$p.value,tt2$p.value,tt3$p.value),c(tt1$statistic,tt2$statistic,tt3$statistic)),1,3)
```


### All genes denoised

```{r}
output=readRDS("../final_processed_results/MNN_corrected_all.rds")
mtx=output@assays$data$corrected

cell.meta.data=colData(output)
cell.meta.data$dataset_batch=plyr::mapvalues(cell.meta.data$batch_label,names(maprules),maprules)
gene_ann=data.frame(gene_short_name = make.unique(rownames(mtx)),
                    row.names = make.unique(rownames(mtx)))

#colnames(mtx)=colData(output)$cellname
#rownames(mtx)=rownames(gene_ann)
#mtx_sizefactor=1e4/colSums(mtx)
```

```{r}
#mtx=mtx[gene_ann$VarianceType=="HVG",]
cds <- new_cell_data_set(mtx, cell_metadata = cell.meta.data,gene_metadata =gene_ann)
## Step 1: Normalize and pre-process the data
cds <- preprocess_cds(cds, num_dim = 32,method="PCA",norm_method="log")
#tmp0=py_to_r(adata$obsm["X_dca_latent"])
#colnames(tmp0)=paste0("PC",1:ncol(tmp0))
#reducedDims(cds)$PCA=tmp0

## Step 2: Remove batch effects with cell alignment
##cds <- align_cds(cds, alignment_group = "BatchID", residual_model_formula_str = NULL)
## Step 3: Reduce the dimensions using UMAP
cds <- reduce_dimension(cds,reduction_method = "UMAP",preprocess_method="PCA")

## Step 4: Cluster the cells
cds <- cluster_cells(cds,reduction_method ="UMAP",cluster_method = "leiden")

# Construct the graph
# Note that, for the rest of the code to run, the graph should be fully (partionly) connected
## Step 5: Learn a graph
cds <- learn_graph(cds, use_partition = T,verbose = F)
colData(cds)$clusters=cds@clusters$UMAP$clusters
p1=plot_cells(cds,color_cells_by = "partition",label_cell_groups = F)+theme(legend.position = "top")
p2=plot_cells(cds,color_cells_by = "clusters",label_cell_groups=F,graph_label_size=2, label_leaves=F,label_branch_points=F)+theme(legend.position = "top")
p=cowplot::plot_grid(p1,p2,align = "h",ncol = 3)
```

```{r, fig.height=5,fig.width=18}
p
```

```{r}
## Step 6: Order cells
# root cells
ids=get_earliest_principal_node(cds,cluster=c("6","3","5"))
cds <- order_cells(cds, root_pr_nodes=ids)
#plot_cells(cds,color_cells_by = "pseudotime")
```


```{r,fig.height=5,fig.width=18}
colData(cds)$pseudotime=pseudotime(cds)
colData(cds)$Pseudotime=colData(cds)$pseudotime/max(colData(cds)$pseudotime,na.rm = T)
df_den=pData(cds)[,c("Pseudotime","dataset_batch")]
df_den=as.data.frame(df_den[!is.infinite(df_den$Pseudotime),])
set.seed(10)

theme_use=theme(legend.text = element_text(size=16),
                legend.title = element_text(size=20))

p_mnn_denoised_all_1=plot_cells(cds,color_cells_by = "dataset_batch",,graph_label_size=0,alpha=1,cell_size = 0.6)+
  guides(colour = guide_legend(override.aes = list(alpha=0.7, size=5)))+
  theme_use+
  theme(legend.position = "top")
          
p_mnn_denoised_all_2=plot_cells(cds,color_cells_by = "Pseudotime",label_branch_points=T,graph_label_size=2,alpha=1,cell_size = 0.6)+
            theme(legend.position = "top",
              legend.title = element_text(vjust = 0.2),
              legend.text = element_text(angle=-50 ),
                  legend.key.height = unit(0.5,"cm"),
                  legend.key.width = unit(1,"cm"))+
            guides(color = guide_colourbar(label.position = "top"))+theme_use

p_mnn_denoised_all_3=ggplot(data=df_den)+geom_density(aes(x=Pseudotime,fill=dataset_batch),alpha=0.7)+
            scale_y_continuous(expand = c(0,0))+
            scale_x_continuous(expand = c(0,0))+
            theme(legend.position="top")+theme_use

p_monocle_mnn_denoised_all=egg::ggarrange(p_mnn_denoised_all_1,p_mnn_denoised_all_2,p_mnn_denoised_all_3,ncol=3,draw=F)
```

```{r, fig.height=5,fig.width=18}
p_monocle_mnn_denoised_all
```

```{r}
#cds_exprs=FetchData(obj0,vars = c("FCGR3A","S100A8"))
#df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,cds_exprs))
cds_exprs=as.matrix(SingleCellExperiment::counts(cds)[c("FCGR3A","S100A8"),])
df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,t(cds_exprs)*mtx_sizefactor))
df0$UMAP_1=reducedDims(cds)$UMAP[,1]
df0$UMAP_2=reducedDims(cds)$UMAP[,2]
df0$BatchID=pData(cds)$dataset_batch
df0=df0[is.finite(df0$pseudotime),]
df0=df0[order(df0$pseudotime,decreasing = F),,drop=F]
df0$x=df0$pseudotime/max(df0$pseudotime)
df_pseudotime_list$mnn_denoised_all=df0
```

- Feature plots of `FCGR3A` and `S100A8`

```{r}
p=get_plot4(df00 = df0)
```

```{r, fig.height=4,fig.width=18}
p
```

```{r}
df_den=colData(cds)
tt1=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt1
tt2=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T2"])
tt2
tt3=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T2"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt3
```

```{r}
Stable5[13,2:4]=matrix(get_p_new(c(tt1$p.value,tt2$p.value,tt3$p.value),c(tt1$statistic,tt2$statistic,tt3$statistic)),1,3)
```




# Monocle3 using scanorama


```{r}
adata=ad$read_h5ad("../final_processed_results/scanorama Results/adata_ALL.h5ad")#
```

```{r}
cell.meta.data=py_to_r(adata$obs)
cell.meta.data$dataset_batch=plyr::mapvalues(cell.meta.data$batch_label,names(maprules),maprules)
gene_ann0=py_to_r(adata$raw$var)
gene_ann=data.frame(gene_short_name = make.unique(rownames(gene_ann0)),
                    row.names = make.unique(rownames(gene_ann0)))
mtx=t(py_to_r(adata$X$tocsc()))#adata$raw
colnames(mtx)=cell.meta.data$cellname
rownames(mtx)=rownames(gene_ann)
mtx_sizefactor=1e4/colSums(mtx)
```

## Using latent

```{r}
#mtx=mtx[gene_ann$VarianceType=="HVG",]
cds <- new_cell_data_set(mtx, cell_metadata = cell.meta.data,gene_metadata =gene_ann)
## Step 1: Normalize and pre-process the data
cds <- preprocess_cds(cds, num_dim = 50,method="PCA",norm_method="log")

tmp0=py_to_r(adata$obsm["X_scanorama"])
colnames(tmp0)=paste0("PC",1:ncol(tmp0))
reducedDims(cds)$PCA=tmp0

## Step 2: Remove batch effects with cell alignment
##cds <- align_cds(cds, alignment_group = "BatchID", residual_model_formula_str = NULL)
## Step 3: Reduce the dimensions using UMAP
cds <- reduce_dimension(cds,reduction_method = "UMAP",preprocess_method="PCA")

## Step 4: Cluster the cells
cds <- cluster_cells(cds,reduction_method ="UMAP",cluster_method = "leiden")

# Construct the graph
# Note that, for the rest of the code to run, the graph should be fully (partionly) connected
## Step 5: Learn a graph
cds <- learn_graph(cds, use_partition = T,verbose = F)
colData(cds)$clusters=cds@clusters$UMAP$clusters
p1=plot_cells(cds,color_cells_by = "partition",label_cell_groups = F)+theme(legend.position = "top")
p2=plot_cells(cds,color_cells_by = "clusters",label_cell_groups=F,graph_label_size=2, label_leaves=F,label_branch_points=F)+theme(legend.position = "top")
p=cowplot::plot_grid(p1,p2,align = "h",ncol = 3)
```

```{r, fig.height=5,fig.width=18}
p
```

```{r}
## Step 6: Order cells
# root cells
ids=get_earliest_principal_node(cds,cluster=c("2"))#need to specify
cds <- order_cells(cds, root_pr_nodes=ids)
#plot_cells(cds,color_cells_by = "pseudotime")
```

```{r,fig.height=5,fig.width=18}
colData(cds)$pseudotime=pseudotime(cds)
colData(cds)$Pseudotime=colData(cds)$pseudotime/max(colData(cds)$pseudotime,na.rm = T)
#saveRDS(cds,file="cds_scanorama.rds")
df_den=pData(cds)[,c("Pseudotime","dataset_batch")]
df_den=as.data.frame(df_den[!is.infinite(df_den$Pseudotime),])
set.seed(10)

theme_use=theme(legend.text = element_text(size=16),
                legend.title = element_text(size=20))

p_monocle_scanorama_latent_hvg_1=plot_cells(cds,color_cells_by = "dataset_batch",,graph_label_size=0,alpha=1,cell_size = 0.6)+
  guides(colour = guide_legend(override.aes = list(alpha=0.7, size=5)))+
  theme_use+
  theme(legend.position = "top")
          
p_monocle_scanorama_latent_hvg_2=plot_cells(cds,color_cells_by = "Pseudotime",label_branch_points=T,graph_label_size=2,alpha=1,cell_size = 0.6)+
            theme(legend.position = "top",
              legend.title = element_text(vjust = 0.2),
              legend.text = element_text(angle=-50 ),
                  legend.key.height = unit(0.5,"cm"),
                  legend.key.width = unit(1,"cm"))+
            guides(color = guide_colourbar(label.position = "top"))+theme_use

p_monocle_scanorama_latent_hvg_3=ggplot(data=df_den)+geom_density(aes(x=Pseudotime,fill=dataset_batch),alpha=0.7)+
            scale_y_continuous(expand = c(0,0))+
            scale_x_continuous(expand = c(0,0))+
            theme(legend.position="top")+theme_use

p_monocle_scanorama_latent_hvg=egg::ggarrange(p_monocle_scanorama_latent_hvg_1,p_monocle_scanorama_latent_hvg_2,p_monocle_scanorama_latent_hvg_3,ncol=3,draw=F)
```

```{r, fig.height=5,fig.width=18}
p_monocle_scanorama_latent_hvg
```


```{r}
#cds_exprs=FetchData(obj0,vars = c("FCGR3A","S100A8"))
#df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,cds_exprs))
cds_exprs=as.matrix(SingleCellExperiment::counts(cds)[c("FCGR3A","S100A8"),])
df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,t(cds_exprs)))
df0$UMAP_1=reducedDims(cds)$UMAP[,1]
df0$UMAP_2=reducedDims(cds)$UMAP[,2]
df0$BatchID=pData(cds)$dataset_batch
df0=df0[is.finite(df0$pseudotime),]
df0=df0[order(df0$pseudotime,decreasing = F),,drop=F]
df0$x=df0$pseudotime/max(df0$pseudotime)
df_pseudotime_list$scanorama_latent_hvg=df0
```

- Feature plots of `FCGR3A` and `S100A8`

```{r}
p=get_plot4(df00 = df0)
```

```{r, fig.height=4,fig.width=18}
p
```



```{r}
df_den=colData(cds)
tt1=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt1
tt2=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T2"])
tt2
tt3=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T2"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt3
```

```{r}
Stable5[14,2:4]=matrix(get_p_new(c(tt1$p.value,tt2$p.value,tt3$p.value),c(tt1$statistic,tt2$statistic,tt3$statistic)),1,3)
```


## HVGs denoised

```{r}
#cds <- new_cell_data_set(mtx[rownames(mtx)%in%hvg_genes$genename,], cell_metadata = cell.meta.data,gene_metadata =gene_ann[gene_ann$gene_short_name%in%hvg_genes$genename,,drop=F])
cds <- new_cell_data_set(mtx, cell_metadata = cell.meta.data,gene_metadata =gene_ann)
## Step 1: Normalize and pre-process the data
cds <- preprocess_cds(cds, num_dim = 50,method="PCA",norm_method="log",verbose = F)

tmp0=py_to_r(adata$obsm["X_hvgpca"])
colnames(tmp0)=paste0("PC",1:ncol(tmp0))
reducedDims(cds)$PCA=tmp0

## Step 2: Remove batch effects with cell alignment
##cds <- align_cds(cds, alignment_group = "BatchID", residual_model_formula_str = NULL)
## Step 3: Reduce the dimensions using UMAP
cds <- reduce_dimension(cds,reduction_method = "UMAP",preprocess_method="PCA",verbose = F)

## Step 4: Cluster the cells
cds <- cluster_cells(cds,reduction_method ="UMAP",cluster_method = "leiden",verbose = F)

# Construct the graph
# Note that, for the rest of the code to run, the graph should be fully (partionly) connected
## Step 5: Learn a graph
cds <- learn_graph(cds, use_partition = T,verbose = F)
colData(cds)$clusters=cds@clusters$UMAP$clusters
p1=plot_cells(cds,color_cells_by = "partition",label_cell_groups = F)+theme(legend.position = "top")
p2=plot_cells(cds,color_cells_by = "clusters",label_cell_groups=F,graph_label_size=2, label_leaves=F,label_branch_points=F)+theme(legend.position = "top")
p=cowplot::plot_grid(p1,p2,align = "h",ncol = 3)
```

```{r, fig.height=5,fig.width=18}
p
```

```{r}
## Step 6: Order cells
# root cells
ids=get_earliest_principal_node(cds,cluster=c("2"))
cds <- order_cells(cds, root_pr_nodes=ids)
#plot_cells(cds,color_cells_by = "pseudotime")
```


```{r,fig.height=5,fig.width=18}
colData(cds)$pseudotime=pseudotime(cds)
colData(cds)$Pseudotime=colData(cds)$pseudotime/max(colData(cds)$pseudotime,na.rm = T)
df_den=pData(cds)[,c("Pseudotime","dataset_batch")]
df_den=as.data.frame(df_den[!is.infinite(df_den$Pseudotime),])
set.seed(10)

theme_use=theme(legend.text = element_text(size=16),
                legend.title = element_text(size=20))

p_monocle_scanorama_denoised_hvg_1=plot_cells(cds,color_cells_by = "dataset_batch",,graph_label_size=0,alpha=1,cell_size = 0.6)+
  guides(colour = guide_legend(override.aes = list(alpha=0.7, size=5)))+
  theme_use+
  theme(legend.position = "top")
          
p_monocle_scanorama_denoised_hvg_2=plot_cells(cds,color_cells_by = "Pseudotime",label_branch_points=T,graph_label_size=2,alpha=1,cell_size = 0.6)+
            theme(legend.position = "top",
              legend.title = element_text(vjust = 0.2),
              legend.text = element_text(angle=-50 ),
                  legend.key.height = unit(0.5,"cm"),
                  legend.key.width = unit(1,"cm"))+
            guides(color = guide_colourbar(label.position = "top"))+theme_use

p_monocle_scanorama_denoised_hvg_3=ggplot(data=df_den)+geom_density(aes(x=Pseudotime,fill=dataset_batch),alpha=0.7)+
            scale_y_continuous(expand = c(0,0))+
            scale_x_continuous(expand = c(0,0))+
            theme(legend.position="top")+theme_use

p_monocle_scanorama_denoised_hvg=egg::ggarrange(p_monocle_scanorama_denoised_hvg_1,p_monocle_scanorama_denoised_hvg_2,p_monocle_scanorama_denoised_hvg_3,ncol=3,draw=F)
```

```{r, fig.height=5,fig.width=18}
p_monocle_scanorama_denoised_hvg
```

```{r}
#cds_exprs=FetchData(obj0,vars = c("FCGR3A","S100A8"))
#df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,cds_exprs))
cds_exprs=as.matrix(SingleCellExperiment::counts(cds)[c("FCGR3A","S100A8"),])
df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,t(cds_exprs)*mtx_sizefactor))
df0$UMAP_1=reducedDims(cds)$UMAP[,1]
df0$UMAP_2=reducedDims(cds)$UMAP[,2]
df0$BatchID=pData(cds)$dataset_batch
df0=df0[is.finite(df0$pseudotime),]
df0=df0[order(df0$pseudotime,decreasing = F),,drop=F]
df0$x=df0$pseudotime/max(df0$pseudotime)
df_pseudotime_list$scanorama_denoised_hvg=df0
```

- Feature plots of `FCGR3A` and `S100A8`

```{r}
p=get_plot4(df00 = df0)
```

```{r, fig.height=4,fig.width=18}
p
```

```{r}
df_den=colData(cds)
tt1=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt1
tt2=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T2"])
tt2
tt3=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T2"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt3
```

```{r}
Stable5[15,2:4]=matrix(get_p_new(c(tt1$p.value,tt2$p.value,tt3$p.value),c(tt1$statistic,tt2$statistic,tt3$statistic)),1,3)
```


## All genes denoised

```{r}
#cds <- new_cell_data_set(mtx[rownames(mtx)%in%hvg_genes$genename,], cell_metadata = cell.meta.data,gene_metadata =gene_ann[gene_ann$gene_short_name%in%hvg_genes$genename,,drop=F])
cds <- new_cell_data_set(mtx, cell_metadata = cell.meta.data,gene_metadata =gene_ann)
## Step 1: Normalize and pre-process the data
cds <- preprocess_cds(cds, num_dim = 30,method="PCA",norm_method="log",verbose = F)

tmp0=py_to_r(adata$obsm["X_pca"])
colnames(tmp0)=paste0("PC",1:ncol(tmp0))
reducedDims(cds)$PCA=tmp0[,1:30]

## Step 2: Remove batch effects with cell alignment
##cds <- align_cds(cds, alignment_group = "BatchID", residual_model_formula_str = NULL)
## Step 3: Reduce the dimensions using UMAP
cds <- reduce_dimension(cds,reduction_method = "UMAP",preprocess_method="PCA",verbose = F)

## Step 4: Cluster the cells
cds <- cluster_cells(cds,reduction_method ="UMAP",cluster_method = "leiden",verbose = F)

# Construct the graph
# Note that, for the rest of the code to run, the graph should be fully (partionly) connected
## Step 5: Learn a graph
cds <- learn_graph(cds, use_partition = T,verbose = F)
colData(cds)$clusters=cds@clusters$UMAP$clusters
p1=plot_cells(cds,color_cells_by = "partition",label_cell_groups = F)+theme(legend.position = "top")
p2=plot_cells(cds,color_cells_by = "clusters",label_cell_groups=F,graph_label_size=2, label_leaves=F,label_branch_points=F)+theme(legend.position = "top")
p=cowplot::plot_grid(p1,p2,align = "h",ncol = 3)
```

```{r, fig.height=5,fig.width=18}
p
```

```{r}
## Step 6: Order cells
# root cells
ids=get_earliest_principal_node(cds,cluster=c("1"))
cds <- order_cells(cds, root_pr_nodes=ids)
#plot_cells(cds,color_cells_by = "pseudotime")
```


```{r,fig.height=5,fig.width=18}
colData(cds)$pseudotime=pseudotime(cds)
colData(cds)$Pseudotime=colData(cds)$pseudotime/max(colData(cds)$pseudotime,na.rm = T)
df_den=pData(cds)[,c("Pseudotime","dataset_batch")]
df_den=as.data.frame(df_den[!is.infinite(df_den$Pseudotime),])
set.seed(10)

theme_use=theme(legend.text = element_text(size=16),
                legend.title = element_text(size=20))

p_monocle_scanorama_denoised_all_1=plot_cells(cds,color_cells_by = "dataset_batch",,graph_label_size=0,alpha=1,cell_size = 0.6)+
  guides(colour = guide_legend(override.aes = list(alpha=0.7, size=5)))+
  theme_use+
  theme(legend.position = "top")
          
p_monocle_scanorama_denoised_all_2=plot_cells(cds,color_cells_by = "Pseudotime",label_branch_points=T,graph_label_size=2,alpha=1,cell_size = 0.6)+
            theme(legend.position = "top",
              legend.title = element_text(vjust = 0.2),
              legend.text = element_text(angle=-50 ),
                  legend.key.height = unit(0.5,"cm"),
                  legend.key.width = unit(1,"cm"))+
            guides(color = guide_colourbar(label.position = "top"))+theme_use

p_monocle_scanorama_denoised_all_3=ggplot(data=df_den)+geom_density(aes(x=Pseudotime,fill=dataset_batch),alpha=0.7)+
            scale_y_continuous(expand = c(0,0))+
            scale_x_continuous(expand = c(0,0))+
            theme(legend.position="top")+theme_use

p_monocle_scanorama_denoised_all=egg::ggarrange(p_monocle_scanorama_denoised_all_1,p_monocle_scanorama_denoised_all_2,p_monocle_scanorama_denoised_all_3,ncol=3,draw=F)
```

```{r, fig.height=5,fig.width=18}
p_monocle_scanorama_denoised_all
```

```{r}
#cds_exprs=FetchData(obj0,vars = c("FCGR3A","S100A8"))
#df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,cds_exprs))
cds_exprs=as.matrix(SingleCellExperiment::counts(cds)[c("FCGR3A","S100A8"),])
df0=data.frame(cbind(pseudotime=pData(cds)$Pseudotime,t(cds_exprs)*mtx_sizefactor))
df0$UMAP_1=reducedDims(cds)$UMAP[,1]
df0$UMAP_2=reducedDims(cds)$UMAP[,2]
df0$BatchID=pData(cds)$dataset_batch
df0=df0[is.finite(df0$pseudotime),]
df0=df0[order(df0$pseudotime,decreasing = F),,drop=F]
df0$x=df0$pseudotime/max(df0$pseudotime)
df_pseudotime_list$scanorama_denoised_all=df0
```

- Feature plots of `FCGR3A` and `S100A8`

```{r}
p=get_plot4(df00 = df0)
```

```{r, fig.height=4,fig.width=18}
p
```

```{r}
df_den=colData(cds)
tt1=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt1
tt2=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T1"],df_den$Pseudotime[df_den$dataset_batch=="T2"])
tt2
tt3=ks.test(df_den$Pseudotime[df_den$dataset_batch=="T2"],df_den$Pseudotime[df_den$dataset_batch=="T3"])
tt3
```

```{r}
Stable5[16,2:4]=matrix(get_p_new(c(tt1$p.value,tt2$p.value,tt3$p.value),c(tt1$statistic,tt2$statistic,tt3$statistic)),1,3)
```

```{r}
suppressPackageStartupMessages(library(cowplot))
```

```{r}
#Supplementary Table 5
write.table(Stable5,file="KS_table.csv",sep=",")
openxlsx::write.xlsx(Stable5,file="KS_table.xlsx")

Stable5 %>%
  kable() %>%
  kable_styling()
```

# Figures

##  Main Figure (Figure 5)

```{r, fig.width=30,fig.height=25}
fig_width=30
fig_height=25
labels=letters[1:5]
Methods=c("Raw count HVGs","Raw count All","CarDEC (latent)","CarDEC (denoised HVGs)","CarDEC (denoised All)","scVI (latent)","scVI (denoised HVGs)","scVI (denoised All)","DCA (latent)","DCA (denoised HVGs)","DCA (denoised All)","MNN (denoised HVGs)","MNN (denoised All)","Scanorama (latent)","Scanorama (denoised HVGs)","Scanorama (denoised All)")
use_id=c(4,15,10,7,12)

get_draw_plot=function(plot_id=1,plist0){
  x=0.02
  y=1-plot_id/5
  width=0.98
  height=1/5-0.01 # total number of figures is 12
  pp=draw_plot(egg::ggarrange(plots=plist0,nrow = 1,draw = F,newpage = F),x = x,y = y,width = width,height = height)
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(pp)
}

get_label_pos=function(plot_id=1){
  x=0
  y=1-plot_id/5
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(c(x,y+1/5-1/30))
}

get_title_pos=function(plot_id=1){
  x=0.015
  y=1-plot_id/5
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(c(x,y+1/10-1/60))
}

get_plot_list=function(x,y){
  x0=rep(list(),length=length(x)+length(y))
  x0[1:length(x)]=x[1:3]
  x0[(length(x)+1):length(x0)]=y[1:2]
  return(x0)
}
p=ggdraw()+get_draw_plot(1,get_plot_list(list(p_carDEC_denoised_hvg_1+scale_color_brewer(name="dataset_batch",palette = "Set2"),
                                              p_carDEC_denoised_hvg_2,
                                              p_carDEC_denoised_hvg_3+scale_fill_brewer(name="dataset_batch",palette = "Set2")),
                                         get_plot4_sep(df_pseudotime_list[[4]])[c(3,4)]))+
get_draw_plot(2,get_plot_list(list(p_monocle_scanorama_denoised_hvg_1+scale_color_brewer(name="dataset_batch",palette = "Set2"),
                                   p_monocle_scanorama_denoised_hvg_2,
                                   p_monocle_scanorama_denoised_hvg_3+scale_fill_brewer(name="dataset_batch",palette = "Set2")),
                              get_plot4_sep(df_pseudotime_list[[15]])[c(3,4)]))+
  get_draw_plot(3,get_plot_list(list(p_dca_denoised_hvg_1+scale_color_brewer(name="dataset_batch",palette = "Set2"),
                                     p_dca_denoised_hvg_2,
                                     p_dca_denoised_hvg_3+scale_fill_brewer(name="dataset_batch",palette = "Set2")), 
                                get_plot4_sep(df_pseudotime_list[[10]])[c(3,4)]))+
  get_draw_plot(4,get_plot_list(list(p_scvi_denoised_hvg_1+scale_color_brewer(name="dataset_batch",palette = "Set2"),
                                     p_scvi_denoised_hvg_2,
                                     p_scvi_denoised_hvg_3+scale_fill_brewer(name="dataset_batch",palette = "Set2")),
                                get_plot4_sep(df_pseudotime_list[[7]])[c(3,4)]))+
get_draw_plot(5,get_plot_list(list(p_mnn_denoised_hvg_1+scale_color_brewer(name="dataset_batch",palette = "Set2"),
                                   p_mnn_denoised_hvg_2,
                                   p_mnn_denoised_hvg_3+scale_fill_brewer(name="dataset_batch",palette = "Set2")),
                              get_plot4_sep(df_pseudotime_list[[12]])[c(3,4)]))

for (i in 1:length(use_id)){
  p=p+draw_label(labels[i],x=get_label_pos(i)[1],y=get_label_pos(i)[2],size=30,color="black",hjust = 0,vjust = 1)+
    draw_label(paste0(Methods[use_id[i]],collapse = ""),
               x=get_title_pos(i)[1],
               y=get_title_pos(i)[2],
               size=20,
               color = "black",angle = 90,hjust = 0.5,vjust = 0.5)
}
```

```{r, fig.width=30,fig.height=25}
p
```

```{r}
ggsave(p,filename = "./revised_figures/CarDEC_monocyte_Main_fig1.pdf",width = 30,height = 25)
ggsave(p,filename = "./revised_figures/CarDEC_monocyte_Main_fig1.tiff",width = 30,height = 25,compression="lzw")
```


##  Supplementary Figures about monocyte

1. Raw 

```{r}
plist0=rep(list(),length=9)
plist0[1:3]=list(p_ori_all_1+scale_color_brewer(name="dataset_batch",palette = "Set2")+theme_use,
                 p_ori_all_2,
                 p_ori_all_3+scale_fill_brewer(name="dataset_batch",palette = "Set2")+theme(plot.margin = unit(c(0,0,1,0),"cm")))[1:3]
plist0[4:5]=get_plot4_sep(df_pseudotime_list[[2]])[1:2]
plist0[[6]]=ggplot()+theme_void()+theme(plot.margin = unit(c(0,0,1,0),"cm"))
plist0[7:8]=get_plot4_sep(df_pseudotime_list[[2]])[3:4]
plist0[[9]]=ggplot()+theme_void()
p=ggdraw()+draw_plot(egg::ggarrange(plots = plist0,ncol=3,draw = F),x=0,y=0,width=1,height=1)+
 draw_label("a",x=0,y=1-0.02,size=30,color="black",hjust = 0,vjust = 1)+
 draw_label("b",x=0,y=2/3-0.03,size=30,color="black",hjust = 0,vjust = 1)+
   draw_label("c",x=0,y=1/3-0.03,size=30,color="black",hjust = 0,vjust = 1)
```

```{r, fig.width=18,fig.height=16}
p
```

```{r}
ggsave(p,filename = "./revised_figures/CarDEC_monocyte_Supp_fig1.pdf",width = 18,height = 16)
ggsave(p,filename = "./revised_figures/CarDEC_monocyte_Supp_fig1.tiff",width = 18,height = 16,compression="lzw")
```

2. All genes

```{r,fig.width=36,fig.height=30}
fig_width=30
fig_height=25
labels=letters[1:5]
Methods=c("Raw count HVGs","Raw count All","CarDEC (latent)","CarDEC (denoised HVGs)","CarDEC (denoised All)","scVI (latent)","scVI (denoised HVGs)","scVI (denoised All)","DCA (latent)","DCA (denoised HVGs)","DCA (denoised All)","MNN (denoised HVGs)","MNN (denoised All)","Scanorama (latent)","Scanorama (denoised HVGs)","Scanorama (denoised All)")
use_id=c(5,16,11,8,13)

get_draw_plot=function(plot_id=1,plist0){
  x=0.02
  y=1-plot_id/5
  width=0.98
  height=1/5-0.01 # total number of figures is 12
  pp=draw_plot(egg::ggarrange(plots=plist0,nrow = 1,draw = F,newpage = F),x = x,y = y,width = width,height = height)
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(pp)
}

get_label_pos=function(plot_id=1){
  x=0
  y=1-plot_id/5
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(c(x,y+1/5-1/30))
}

get_title_pos=function(plot_id=1){
  x=0.015
  y=1-plot_id/5
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(c(x,y+1/10-1/60))
}
get_plot_list=function(x,y){
  x0=rep(list(),length=length(x)+length(y))
  x0[1:length(x)]=x[1:3]
  x0[(length(x)+1):length(x0)]=y[1:2]
  return(x0)
}

p=ggdraw()+
  get_draw_plot(1,get_plot_list(list(p_carDEC_denoised_all_1+scale_color_brewer(name="dataset_batch",palette = "Set2"),
                                     p_carDEC_denoised_all_2,
                                     p_carDEC_denoised_all_3+scale_fill_brewer(name="dataset_batch",palette = "Set2")), 
                                get_plot4_sep(df_pseudotime_list[[5]])[c(3,4)]))+
 get_draw_plot(2,get_plot_list(list(p_monocle_scanorama_denoised_all_1+scale_color_brewer(name="dataset_batch",palette = "Set2"),
                                   p_monocle_scanorama_denoised_all_2,
                                   p_monocle_scanorama_denoised_all_3+scale_fill_brewer(name="dataset_batch",palette = "Set2")),
                              get_plot4_sep(df_pseudotime_list[[16]])[c(3,4)]))+
  get_draw_plot(3,get_plot_list(list(p_dca_denoised_all_1+scale_color_brewer(name="dataset_batch",palette = "Set2"),
                                     p_dca_denoised_all_2,
                                     p_dca_denoised_all_3+scale_fill_brewer(name="dataset_batch",palette = "Set2")), 
                                get_plot4_sep(df_pseudotime_list[[11]])[c(3,4)]))+
  get_draw_plot(4,get_plot_list(list(p_scVI_denoised_all_1+scale_color_brewer(name="dataset_batch",palette = "Set2"),
                                     p_scVI_denoised_all_2,
                                     p_scVI_denoised_all_3+scale_fill_brewer(name="dataset_batch",palette = "Set2")), 
                                get_plot4_sep(df_pseudotime_list[[8]])[c(3,4)]))+
  get_draw_plot(5,get_plot_list(list(p_mnn_denoised_all_1+scale_color_brewer(name="dataset_batch",palette = "Set2"),
                                     p_mnn_denoised_all_2,
                                     p_mnn_denoised_all_3+scale_fill_brewer(name="dataset_batch",palette = "Set2")), 
                                get_plot4_sep(df_pseudotime_list[[13]])[c(3,4)]))
 
for (i in 1:length(use_id)){
  p=p+draw_label(labels[i],x=get_label_pos(i)[1],y=get_label_pos(i)[2],size=30,color="black",hjust = 0,vjust = 1)+
    draw_label(paste0(Methods[use_id[i]],collapse = ""),
               x=get_title_pos(i)[1],
               y=get_title_pos(i)[2],
               size=20,
               color = "black",angle = 90,hjust = 0.5,vjust = 0.5)
}
```

```{r, fig.width=30,fig.height=25}
p
```

```{r}
ggsave(p,filename = "./revised_figures/CarDEC_monocyte_Supp_fig2.pdf",width = 30,height = 25,limitsize = F)
ggsave(p,filename = "./revised_figures/CarDEC_monocyte_Supp_fig2.tiff",width = 30,height = 25,limitsize = F,compression="lzw")
```


3. Latent

```{r,fig.width=36,fig.height=30}
fig_width=30
fig_height=25
labels=letters[1:5]
Methods=c("Raw count HVGs","Raw count All","CarDEC (latent)","CarDEC (denoised HVGs)","CarDEC (denoised All)","scVI (latent)","scVI (denoised HVGs)","scVI (denoised All)","DCA (latent)","DCA (denoised HVGs)","DCA (denoised All)","MNN (denoised HVGs)","MNN (denoised All)","Scanorama (latent)","Scanorama (denoised HVGs)","Scanorama (denoised All)")

use_id=c(3,14,9,6,1)

get_draw_plot=function(plot_id=1,plist0){
  x=0.02
  y=1-plot_id/5
  width=0.98
  height=1/5-0.01 # total number of figures is 12
  pp=draw_plot(egg::ggarrange(plots=plist0,nrow = 1,draw = F,newpage = F),x = x,y = y,width = width,height = height)
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(pp)
}

get_label_pos=function(plot_id=1){
  x=0
  y=1-plot_id/5
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(c(x,y+1/5-1/30))
}

get_title_pos=function(plot_id=1){
  x=0.015
  y=1-plot_id/5
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(c(x,y+1/10-1/60))
}
get_plot_list=function(x,y){
  x0=rep(list(),length=length(x)+length(y))
  x0[1:length(x)]=x[1:3]
  x0[(length(x)+1):length(x0)]=y[1:2]
  return(x0)
}


p=ggdraw()+get_draw_plot(1,get_plot_list(list(p_carDEC_latent_1+scale_color_brewer(name="dataset_batch",palette = "Set2"),
                                              p_carDEC_latent_2,
                                              p_carDEC_latent_3+scale_fill_brewer(name="dataset_batch",palette = "Set2")),
                                         get_plot4_sep(df_pseudotime_list[[3]])[c(3,4)]))+
  get_draw_plot(2,get_plot_list(list(p_monocle_scanorama_latent_hvg_1+scale_color_brewer(name="dataset_batch",palette = "Set2"),
                                   p_monocle_scanorama_latent_hvg_2,
                                   p_monocle_scanorama_latent_hvg_3+scale_fill_brewer(name="dataset_batch",palette = "Set2")),
                              get_plot4_sep(df_pseudotime_list[[14]])[c(3,4)]))+
  get_draw_plot(3,get_plot_list(list(p_dca_latent_all_1+scale_color_brewer(name="dataset_batch",palette = "Set2"),
                                     p_dca_latent_all_2,
                                     p_dca_latent_all_3+scale_fill_brewer(name="dataset_batch",palette = "Set2")), 
                                get_plot4_sep(df_pseudotime_list[[9]])[c(3,4)]))+
  get_draw_plot(4,get_plot_list(list(p_scVI_latent_all_1+scale_color_brewer(name="dataset_batch",palette = "Set2"),
                                     p_scVI_latent_all_2,
                                     p_scVI_latent_all_3+scale_fill_brewer(name="dataset_batch",palette = "Set2")), 
                                get_plot4_sep(df_pseudotime_list[[6]])[c(3,4)]))+
  get_draw_plot(5,get_plot_list(list(p_ori_1+scale_color_brewer(name="dataset_batch",palette = "Set2"),
                                     p_ori_2,
                                     p_ori_3+scale_fill_brewer(name="dataset_batch",palette = "Set2")), 
                                get_plot4_sep(df_pseudotime_list[[1]])[c(3,4)]))
for (i in 1:length(use_id)){
  p=p+draw_label(labels[i],x=get_label_pos(i)[1],y=get_label_pos(i)[2],size=30,color="black",hjust = 0,vjust = 1)+
    draw_label(paste0(Methods[use_id[i]],collapse = ""),
               x=get_title_pos(i)[1],
               y=get_title_pos(i)[2],
               size=20,
               color = "black",angle = 90,hjust = 0.5,vjust = 0.5)
}
```

```{r, fig.width=30,fig.height=25}
p
```

```{r}
ggsave(p,filename = "./revised_figures/CarDEC_monocyte_Supp_fig3.pdf",width = 30,height = 25,limitsize = F)
ggsave(p,filename = "./revised_figures/CarDEC_monocyte_Supp_fig3.tiff",width = 30,height = 25,limitsize = F,compression="lzw")
```

## S100A8 and FCGR3A's feature plots

1. monocles' UMAP of denoised counts from HVGs 

```{r}
fig_width=15
fig_height=25
labels=letters[1:13]
Methods=c("Raw count HVGs","Raw count All","CarDEC (latent)","CarDEC (denoised HVGs)","CarDEC (denoised All)","scVI (latent)","scVI (denoised HVGs)","scVI (denoised All)","DCA (latent)","DCA (denoised HVGs)","DCA (denoised All)","MNN (denoised HVGs)","MNN (denoised All)","Scanorama (latent)","Scanorama (denoised HVGs)","Scanorama (denoised All)")
use_id=c(4,15,10,7,12)
#use_id=c(3,9,6,1)
get_draw_plot=function(plot_id=1,plist0){
  x=0.02
  y=1-plot_id/5
  width=0.98
  height=1/5-0.01 # total number of figures is 12
  pp=draw_plot(egg::ggarrange(plots=plist0,nrow = 1,draw = F,newpage = F),x = x,y = y,width = width,height = height)
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(pp)
}

get_label_pos=function(plot_id=1){
  x=0
  y=1-plot_id/5
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(c(x,y+1/5-1/100))
}

get_title_pos=function(plot_id=1){
  x=0.015
  y=1-plot_id/5
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(c(x,y+1/10-1/100))
}
get_plot_list=function(x,y){
  x0=rep(list(),length=length(x)+length(y))
  x0[1:length(x)]=x[1:3]
  x0[(length(x)+1):length(x0)]=y[1:2]
  return(x0)
}

p=ggdraw()
for(i in 1:length(use_id)){
  p=p+get_draw_plot(i,get_plot4_sep(df_pseudotime_list[[use_id[i]]])[c(1,2)])
}

for (i in 1:length(use_id)){
  p=p+draw_label(labels[i],x=get_label_pos(i)[1],y=get_label_pos(i)[2],size=30,color="black",hjust = 0,vjust = 1)+
    draw_label(paste0(Methods[use_id[i]],collapse = ""),
               x=get_title_pos(i)[1],
               y=get_title_pos(i)[2],
               size=20,
               color = "black",angle = 90,hjust = 0.5,vjust = 0.5)
}

```

```{r fig.width=15,fig.height=25}
ggsave("./revised_figures/CarDEC_monocyte_Supp_fig4.pdf",p,width = 15,height = 25)
ggsave("./revised_figures/CarDEC_monocyte_Supp_fig4.tiff",p,width = 15,height = 25,compression="lzw")
p
```

2. monocles' UMAP of denoised counts from All genes 


```{r}
fig_width=15
fig_height=25
labels=letters[1:13]
Methods=c("Raw count HVGs","Raw count All","CarDEC (latent)","CarDEC (denoised HVGs)","CarDEC (denoised All)","scVI (latent)","scVI (denoised HVGs)","scVI (denoised All)","DCA (latent)","DCA (denoised HVGs)","DCA (denoised All)","MNN (denoised HVGs)","MNN (denoised All)","Scanorama (latent)","Scanorama (denoised HVGs)","Scanorama (denoised All)")
use_id=c(5,16,11,8,13)
get_draw_plot=function(plot_id=1,plist0){
  x=0.02
  y=1-plot_id/5
  width=0.98
  height=1/5-0.01 # total number of figures is 12
  pp=draw_plot(egg::ggarrange(plots=plist0,nrow = 1,draw = F,newpage = F),x = x,y = y,width = width,height = height)
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(pp)
}

get_label_pos=function(plot_id=1){
  x=0
  y=1-plot_id/5
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(c(x,y+1/5-1/100))
}

get_title_pos=function(plot_id=1){
  x=0.015
  y=1-plot_id/5
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(c(x,y+1/10-1/100))
}
get_plot_list=function(x,y){
  x0=rep(list(),length=length(x)+length(y))
  x0[1:length(x)]=x[1:3]
  x0[(length(x)+1):length(x0)]=y[1:2]
  return(x0)
}

p=ggdraw()
for(i in 1:length(use_id)){
  p=p+get_draw_plot(i,get_plot4_sep(df_pseudotime_list[[use_id[i]]])[c(1,2)])
}

for (i in 1:length(use_id)){
  p=p+draw_label(labels[i],x=get_label_pos(i)[1],y=get_label_pos(i)[2],size=30,color="black",hjust = 0,vjust = 1)+
    draw_label(paste0(Methods[use_id[i]],collapse = ""),
               x=get_title_pos(i)[1],
               y=get_title_pos(i)[2],
               size=20,
               color = "black",angle = 90,hjust = 0.5,vjust = 0.5)
}

```

```{r fig.width=15,fig.height=25}
ggsave("./revised_figures/CarDEC_monocyte_Supp_fig5.pdf",p,width = 15,height = 25)
ggsave("./revised_figures/CarDEC_monocyte_Supp_fig5.tiff",p,width = 15,height = 25,compression="lzw")
p
```


3. monocles' UMAP based on different methods' latent


```{r}
fig_width=15
fig_height=25
labels=letters[1:13]
Methods=c("Raw count HVGs","Raw count All","CarDEC (latent)","CarDEC (denoised HVGs)","CarDEC (denoised All)","scVI (latent)","scVI (denoised HVGs)","scVI (denoised All)","DCA (latent)","DCA (denoised HVGs)","DCA (denoised All)","MNN (denoised HVGs)","MNN (denoised All)","Scanorama (latent)","Scanorama (denoised HVGs)","Scanorama (denoised All)")
use_id=c(3,14,9,6,1)
get_draw_plot=function(plot_id=1,plist0){
  x=0.02
  y=1-plot_id/5
  width=0.98
  height=1/5-0.01 # total number of figures is 12
  pp=draw_plot(egg::ggarrange(plots=plist0,nrow = 1,draw = F,newpage = F),x = x,y = y,width = width,height = height)
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(pp)
}

get_label_pos=function(plot_id=1){
  x=0
  y=1-plot_id/5
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(c(x,y+1/5-1/100))
}

get_title_pos=function(plot_id=1){
  x=0.015
  y=1-plot_id/5
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(c(x,y+1/10-1/100))
}
get_plot_list=function(x,y){
  x0=rep(list(),length=length(x)+length(y))
  x0[1:length(x)]=x[1:3]
  x0[(length(x)+1):length(x0)]=y[1:2]
  return(x0)
}

p=ggdraw()
for(i in 1:length(use_id)){
  p=p+get_draw_plot(i,get_plot4_sep(df_pseudotime_list[[use_id[i]]])[c(1,2)])
}

for (i in 1:length(use_id)){
  p=p+draw_label(labels[i],x=get_label_pos(i)[1],y=get_label_pos(i)[2],size=30,color="black",hjust = 0,vjust = 1)+
    draw_label(paste0(Methods[use_id[i]],collapse = ""),
               x=get_title_pos(i)[1],
               y=get_title_pos(i)[2],
               size=20,
               color = "black",angle = 90,hjust = 0.5,vjust = 0.5)
}

```

```{r fig.width=15,fig.height=25}
ggsave("./revised_figures/CarDEC_monocyte_Supp_fig6.pdf",p,width = 15,height = 25)
ggsave("./revised_figures/CarDEC_monocyte_Supp_fig6.tiff",p,width = 15,height = 25,compression="lzw")
p
```

4. Combined above three figures

```{r}
fig_width=24
fig_height=36#
labels=letters[1:16]
Methods=c("Raw count HVGs","Raw count All","CarDEC (latent)","CarDEC (denoised HVGs)","CarDEC (denoised All)","scVI (latent)","scVI (denoised HVGs)","scVI (denoised All)","DCA (latent)","DCA (denoised HVGs)","DCA (denoised All)","MNN (denoised HVGs)","MNN (denoised All)","Scanorama (latent)","Scanorama (denoised HVGs)","Scanorama (denoised All)")
use_id=c(1,3:5,14:16,6:13)
get_draw_plot=function(plot_id=1,plist0){
  x=ifelse(plot_id%%2==1,0,0.5075)
  y=1-floor((plot_id+1)/2)/8
  width=1/2-0.015
  height=1/8-0.01 # total number of figures is 12
  pp=draw_plot(egg::ggarrange(plots=plist0,ncol = 2,draw = F,newpage = F),x = x,y = y,width = width,height = height)
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(pp)
}

get_label_pos=function(plot_id=1){
  x=ifelse(plot_id%%2==1,0,0.5075)
  y=1-floor((plot_id+1)/2)/8
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(c(x,y+1/8-1/100))
}

get_title_pos=function(plot_id=1){
  x=ifelse(plot_id%%2==1,0,0.5075)+0.5/2
  y=1-floor((plot_id+1)/2)/8
  #draw_label(labels[plot_id],x=x,y=y+1/6,hjust =1,vjust = 0.5,size = 35)
  return(c(x,y+1/8-1/300))
}

p=ggdraw()
for(i in 1:length(use_id)){
  p=p+get_draw_plot(i,get_plot4_sep(df_pseudotime_list[[use_id[i]]])[c(1,2)])
}

for (i in 1:length(use_id)){
  p=p+draw_label(labels[i],x=get_label_pos(i)[1],y=get_label_pos(i)[2],size=18,color="black",hjust = 0,vjust = 1)+
    draw_label(paste0(Methods[use_id[i]],collapse = ""),x=get_title_pos(i)[1],y=get_title_pos(i)[2],size=25,color = "black",vjust = 1)
}

```

```{r fig.width=24,fig.height=36}
p
```

```{r}
ggsave(p,filename = "./revised_figures/CarDEC_monocyte_Supp_fig456_combined.pdf",width = 24,height = 36)
ggsave(p,filename = "./revised_figures/CarDEC_monocyte_Supp_fig456_combined.tiff",width = 24,height = 36,compression="lzw")
```


```{r}
#save object 
rm(mtx)
rm(output)
rm(adata)
rm(cds)
rm(obj0)
rm(raw.data)
gc()
save.image(file="carDEC_monocyte_final_revised.RData")#include scanorama,carDEC_monocyte_final.RData not include scanorama
#load("carDEC_monocyte_final_revised.RData")
```

```{r}
#load("carDEC_monocyte_final_revised.RData")
```

```{r}
sessionInfo()
```
